diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-07-26 10:49:23 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2023-07-26 10:49:23 +0100 |
commit | 537a7180a0d5132c11636c4fd8b1450cd99d352c (patch) | |
tree | fb9f8c84955a411bdffbd6371ea32f2716fb3687 /src/libmime | |
parent | 5fd7a90fdaa33f52c59bdb0ca84451e5c1e22365 (diff) | |
download | rspamd-537a7180a0d5132c11636c4fd8b1450cd99d352c.tar.gz rspamd-537a7180a0d5132c11636c4fd8b1450cd99d352c.zip |
[Rework] Use clang-format to unify formatting in all sources
No meaningful changes.
Diffstat (limited to 'src/libmime')
32 files changed, 6735 insertions, 6481 deletions
diff --git a/src/libmime/archives.c b/src/libmime/archives.c index 2cf054035..ea0ea551d 100644 --- a/src/libmime/archives.c +++ b/src/libmime/archives.c @@ -24,43 +24,43 @@ #include <unicode/utf16.h> #include <unicode/ucnv.h> -#define msg_debug_archive(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) +#define msg_debug_archive(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) INIT_LOG_MODULE(archive) static void -rspamd_archive_dtor (gpointer p) +rspamd_archive_dtor(gpointer p) { struct rspamd_archive *arch = p; struct rspamd_archive_file *f; guint i; - for (i = 0; i < arch->files->len; i ++) { - f = g_ptr_array_index (arch->files, i); + for (i = 0; i < arch->files->len; i++) { + f = g_ptr_array_index(arch->files, i); if (f->fname) { - g_string_free (f->fname, TRUE); + g_string_free(f->fname, TRUE); } - g_free (f); + g_free(f); } - g_ptr_array_free (arch->files, TRUE); + g_ptr_array_free(arch->files, TRUE); } static bool -rspamd_archive_file_try_utf (struct rspamd_task *task, - struct rspamd_archive *arch, - struct rspamd_archive_file *fentry, - const gchar *in, gsize inlen) +rspamd_archive_file_try_utf(struct rspamd_task *task, + struct rspamd_archive *arch, + struct rspamd_archive_file *fentry, + const gchar *in, gsize inlen) { const gchar *charset = NULL, *p, *end; GString *res; - charset = rspamd_mime_charset_find_by_content (in, inlen, TRUE); + charset = rspamd_mime_charset_find_by_content(in, inlen, TRUE); if (charset) { UChar *tmp; @@ -69,26 +69,26 @@ rspamd_archive_file_try_utf (struct rspamd_task *task, struct rspamd_charset_converter *conv; UConverter *utf8_converter; - conv = rspamd_mime_get_converter_cached (charset, task->task_pool, - TRUE, &uc_err); - utf8_converter = rspamd_get_utf8_converter (); + conv = rspamd_mime_get_converter_cached(charset, task->task_pool, + TRUE, &uc_err); + utf8_converter = rspamd_get_utf8_converter(); if (conv == NULL) { - msg_info_task ("cannot open converter for %s: %s", - charset, u_errorName (uc_err)); + msg_info_task("cannot open converter for %s: %s", + charset, u_errorName(uc_err)); fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; fentry->fname = g_string_new_len(in, inlen); return false; } - tmp = g_malloc (sizeof (*tmp) * (inlen + 1)); - r = rspamd_converter_to_uchars (conv, tmp, inlen + 1, - in, inlen, &uc_err); - if (!U_SUCCESS (uc_err)) { - msg_info_task ("cannot convert data to unicode from %s: %s", - charset, u_errorName (uc_err)); - g_free (tmp); + tmp = g_malloc(sizeof(*tmp) * (inlen + 1)); + r = rspamd_converter_to_uchars(conv, tmp, inlen + 1, + in, inlen, &uc_err); + if (!U_SUCCESS(uc_err)) { + msg_info_task("cannot convert data to unicode from %s: %s", + charset, u_errorName(uc_err)); + g_free(tmp); fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; fentry->fname = g_string_new_len(in, inlen); @@ -105,47 +105,48 @@ rspamd_archive_file_try_utf (struct rspamd_task *task, if (IS_ZERO_WIDTH_SPACE(uc) || u_iscntrl(uc)) { msg_info_task("control character in archive file name found: 0x%02xd " - "(filename=%T)", uc, arch->archive_name); + "(filename=%T)", + uc, arch->archive_name); fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; break; } } - clen = ucnv_getMaxCharSize (utf8_converter); - dlen = UCNV_GET_MAX_BYTES_FOR_STRING (r, clen); - res = g_string_sized_new (dlen); - r = ucnv_fromUChars (utf8_converter, res->str, dlen, tmp, r, &uc_err); + clen = ucnv_getMaxCharSize(utf8_converter); + dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen); + res = g_string_sized_new(dlen); + r = ucnv_fromUChars(utf8_converter, res->str, dlen, tmp, r, &uc_err); - if (!U_SUCCESS (uc_err)) { - msg_info_task ("cannot convert data from unicode from %s: %s", - charset, u_errorName (uc_err)); - g_free (tmp); - g_string_free (res, TRUE); + if (!U_SUCCESS(uc_err)) { + msg_info_task("cannot convert data from unicode from %s: %s", + charset, u_errorName(uc_err)); + g_free(tmp); + g_string_free(res, TRUE); fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; fentry->fname = g_string_new_len(in, inlen); return NULL; } - g_free (tmp); + g_free(tmp); res->len = r; - msg_debug_archive ("converted from %s to UTF-8 inlen: %z, outlen: %d", - charset, inlen, r); + msg_debug_archive("converted from %s to UTF-8 inlen: %z, outlen: %d", + charset, inlen, r); fentry->fname = res; } else { /* Convert unsafe characters to '?' */ - res = g_string_sized_new (inlen); + res = g_string_sized_new(inlen); p = in; end = in + inlen; while (p < end) { - if (g_ascii_isgraph (*p)) { - g_string_append_c (res, *p); + if (g_ascii_isgraph(*p)) { + g_string_append_c(res, *p); } else { - g_string_append_c (res, '?'); + g_string_append_c(res, '?'); if (*p < 0x7f && (g_ascii_iscntrl(*p) || *p == '\0')) { if (!(fentry->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED)) { @@ -157,7 +158,7 @@ rspamd_archive_file_try_utf (struct rspamd_task *task, } } - p ++; + p++; } fentry->fname = res; } @@ -166,8 +167,8 @@ rspamd_archive_file_try_utf (struct rspamd_task *task, } static void -rspamd_archive_process_zip (struct rspamd_task *task, - struct rspamd_mime_part *part) +rspamd_archive_process_zip(struct rspamd_task *task, + struct rspamd_mime_part *part) { const guchar *p, *start, *end, *eocd = NULL, *cd; const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46; @@ -189,7 +190,7 @@ rspamd_archive_process_zip (struct rspamd_task *task, */ p -= 21; - while (p > start + sizeof (guint32)) { + while (p > start + sizeof(guint32)) { guint32 t; if (processed > max_processed) { @@ -197,87 +198,87 @@ rspamd_archive_process_zip (struct rspamd_task *task, } /* XXX: not an efficient approach */ - memcpy (&t, p, sizeof (t)); + memcpy(&t, p, sizeof(t)); - if (GUINT32_FROM_LE (t) == eocd_magic) { + if (GUINT32_FROM_LE(t) == eocd_magic) { eocd = p; break; } - p --; - processed ++; + p--; + processed++; } if (eocd == NULL) { /* Not a zip file */ - msg_info_task ("zip archive is invalid (no EOCD)"); + msg_info_task("zip archive is invalid (no EOCD)"); return; } if (end - eocd < 21) { - msg_info_task ("zip archive is invalid (short EOCD)"); + msg_info_task("zip archive is invalid (short EOCD)"); return; } - memcpy (&cd_size, eocd + 12, sizeof (cd_size)); - cd_size = GUINT32_FROM_LE (cd_size); - memcpy (&cd_offset, eocd + 16, sizeof (cd_offset)); - cd_offset = GUINT32_FROM_LE (cd_offset); + memcpy(&cd_size, eocd + 12, sizeof(cd_size)); + cd_size = GUINT32_FROM_LE(cd_size); + memcpy(&cd_offset, eocd + 16, sizeof(cd_offset)); + cd_offset = GUINT32_FROM_LE(cd_offset); /* We need to check sanity as well */ - if (cd_offset + cd_size > (guint)(eocd - start)) { - msg_info_task ("zip archive is invalid (bad size/offset for CD)"); + if (cd_offset + cd_size > (guint) (eocd - start)) { + msg_info_task("zip archive is invalid (bad size/offset for CD)"); return; } cd = start + cd_offset; - arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); - arch->files = g_ptr_array_new (); + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_new(); arch->type = RSPAMD_ARCHIVE_ZIP; if (part->cd) { arch->archive_name = &part->cd->filename; } - rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, - arch); + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); while (cd < start + cd_offset + cd_size) { guint16 flags; /* Read central directory record */ if (eocd - cd < cd_basic_len || - memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) { - msg_info_task ("zip archive is invalid (bad cd record)"); + memcmp(cd, cd_magic, sizeof(cd_magic)) != 0) { + msg_info_task("zip archive is invalid (bad cd record)"); return; } - memcpy (&flags, cd + 8, sizeof (guint16)); - flags = GUINT16_FROM_LE (flags); - memcpy (&comp_size, cd + 20, sizeof (guint32)); - comp_size = GUINT32_FROM_LE (comp_size); - memcpy (&uncomp_size, cd + 24, sizeof (guint32)); - uncomp_size = GUINT32_FROM_LE (uncomp_size); - memcpy (&fname_len, cd + 28, sizeof (fname_len)); - fname_len = GUINT16_FROM_LE (fname_len); - memcpy (&extra_len, cd + 30, sizeof (extra_len)); - extra_len = GUINT16_FROM_LE (extra_len); - memcpy (&comment_len, cd + 32, sizeof (comment_len)); - comment_len = GUINT16_FROM_LE (comment_len); + memcpy(&flags, cd + 8, sizeof(guint16)); + flags = GUINT16_FROM_LE(flags); + memcpy(&comp_size, cd + 20, sizeof(guint32)); + comp_size = GUINT32_FROM_LE(comp_size); + memcpy(&uncomp_size, cd + 24, sizeof(guint32)); + uncomp_size = GUINT32_FROM_LE(uncomp_size); + memcpy(&fname_len, cd + 28, sizeof(fname_len)); + fname_len = GUINT16_FROM_LE(fname_len); + memcpy(&extra_len, cd + 30, sizeof(extra_len)); + extra_len = GUINT16_FROM_LE(extra_len); + memcpy(&comment_len, cd + 32, sizeof(comment_len)); + comment_len = GUINT16_FROM_LE(comment_len); if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) { - msg_info_task ("zip archive is invalid (too large cd record)"); + msg_info_task("zip archive is invalid (too large cd record)"); return; } - f = g_malloc0 (sizeof (*f)); - rspamd_archive_file_try_utf (task, arch, f, cd + cd_basic_len, fname_len); + f = g_malloc0(sizeof(*f)); + rspamd_archive_file_try_utf(task, arch, f, cd + cd_basic_len, fname_len); f->compressed_size = comp_size; f->uncompressed_size = uncomp_size; @@ -291,11 +292,11 @@ rspamd_archive_process_zip (struct rspamd_task *task, arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES; } - g_ptr_array_add (arch->files, f); - msg_debug_archive ("found file in zip archive: %v", f->fname); + g_ptr_array_add(arch->files, f); + msg_debug_archive("found file in zip archive: %v", f->fname); } else { - g_free (f); + g_free(f); return; } @@ -304,19 +305,19 @@ rspamd_archive_process_zip (struct rspamd_task *task, const guchar *extra = cd + fname_len + cd_basic_len; p = extra; - while (p + sizeof (guint16) * 2 < extra + extra_len) { + while (p + sizeof(guint16) * 2 < extra + extra_len) { guint16 hid, hlen; - memcpy (&hid, p, sizeof (guint16)); - hid = GUINT16_FROM_LE (hid); - memcpy (&hlen, p + sizeof (guint16), sizeof (guint16)); - hlen = GUINT16_FROM_LE (hlen); + memcpy(&hid, p, sizeof(guint16)); + hid = GUINT16_FROM_LE(hid); + memcpy(&hlen, p + sizeof(guint16), sizeof(guint16)); + hlen = GUINT16_FROM_LE(hlen); if (hid == 0x0017) { f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED; } - p += hlen + sizeof (guint16) * 2; + p += hlen + sizeof(guint16) * 2; } cd += fname_len + comment_len + extra_len + cd_basic_len; @@ -329,7 +330,7 @@ rspamd_archive_process_zip (struct rspamd_task *task, } static inline gint -rspamd_archive_rar_read_vint (const guchar *start, gsize remain, guint64 *res) +rspamd_archive_rar_read_vint(const guchar *start, gsize remain, guint64 *res) { /* * From http://www.rarlab.com/technote.htm: @@ -346,82 +347,87 @@ rspamd_archive_rar_read_vint (const guchar *start, gsize remain, guint64 *res) while (remain > 0 && shift <= 57) { if (*p & 0x80) { - t |= ((guint64)(*p & 0x7f)) << shift; + t |= ((guint64) (*p & 0x7f)) << shift; } else { - t |= ((guint64)(*p & 0x7f)) << shift; - p ++; + t |= ((guint64) (*p & 0x7f)) << shift; + p++; break; } shift += 7; p++; - remain --; + remain--; } if (remain == 0 || shift > 64) { return -1; } - *res = GUINT64_FROM_LE (t); + *res = GUINT64_FROM_LE(t); return p - start; } -#define RAR_SKIP_BYTES(n) do { \ - if ((n) <= 0) { \ - msg_debug_archive ("rar archive is invalid (bad skip value)"); \ - return; \ - } \ - if ((gsize)(end - p) < (n)) { \ - msg_debug_archive ("rar archive is invalid (truncated)"); \ - return; \ - } \ - p += (n); \ -} while (0) - -#define RAR_READ_VINT() do { \ - r = rspamd_archive_rar_read_vint (p, end - p, &vint); \ - if (r == -1) { \ - msg_debug_archive ("rar archive is invalid (bad vint)"); \ - return; \ - } \ - else if (r == 0) { \ - msg_debug_archive ("rar archive is invalid (BAD vint offset)"); \ - return; \ - }\ -} while (0) - -#define RAR_READ_VINT_SKIP() do { \ - r = rspamd_archive_rar_read_vint (p, end - p, &vint); \ - if (r == -1) { \ - msg_debug_archive ("rar archive is invalid (bad vint)"); \ - return; \ - } \ - p += r; \ -} while (0) - -#define RAR_READ_UINT16(n) do { \ - if (end - p < (glong)sizeof (guint16)) { \ - msg_debug_archive ("rar archive is invalid (bad int16)"); \ - return; \ - } \ - n = p[0] + (p[1] << 8); \ - p += sizeof (guint16); \ -} while (0) - -#define RAR_READ_UINT32(n) do { \ - if (end - p < (glong)sizeof (guint32)) { \ - msg_debug_archive ("rar archive is invalid (bad int32)"); \ - return; \ - } \ - n = (guint)p[0] + ((guint)p[1] << 8) + ((guint)p[2] << 16) + ((guint)p[3] << 24); \ - p += sizeof (guint32); \ -} while (0) +#define RAR_SKIP_BYTES(n) \ + do { \ + if ((n) <= 0) { \ + msg_debug_archive("rar archive is invalid (bad skip value)"); \ + return; \ + } \ + if ((gsize) (end - p) < (n)) { \ + msg_debug_archive("rar archive is invalid (truncated)"); \ + return; \ + } \ + p += (n); \ + } while (0) + +#define RAR_READ_VINT() \ + do { \ + r = rspamd_archive_rar_read_vint(p, end - p, &vint); \ + if (r == -1) { \ + msg_debug_archive("rar archive is invalid (bad vint)"); \ + return; \ + } \ + else if (r == 0) { \ + msg_debug_archive("rar archive is invalid (BAD vint offset)"); \ + return; \ + } \ + } while (0) + +#define RAR_READ_VINT_SKIP() \ + do { \ + r = rspamd_archive_rar_read_vint(p, end - p, &vint); \ + if (r == -1) { \ + msg_debug_archive("rar archive is invalid (bad vint)"); \ + return; \ + } \ + p += r; \ + } while (0) + +#define RAR_READ_UINT16(n) \ + do { \ + if (end - p < (glong) sizeof(guint16)) { \ + msg_debug_archive("rar archive is invalid (bad int16)"); \ + return; \ + } \ + n = p[0] + (p[1] << 8); \ + p += sizeof(guint16); \ + } while (0) + +#define RAR_READ_UINT32(n) \ + do { \ + if (end - p < (glong) sizeof(guint32)) { \ + msg_debug_archive("rar archive is invalid (bad int32)"); \ + return; \ + } \ + n = (guint) p[0] + ((guint) p[1] << 8) + ((guint) p[2] << 16) + ((guint) p[3] << 24); \ + p += sizeof(guint32); \ + } while (0) static void -rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, - const guchar *end, struct rspamd_mime_part *part) +rspamd_archive_process_rar_v4(struct rspamd_task *task, const guchar *start, + const guchar *end, struct rspamd_mime_part *part) { const guchar *p = start, *start_section; guint8 type; @@ -430,22 +436,22 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, struct rspamd_archive *arch; struct rspamd_archive_file *f; - arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); - arch->files = g_ptr_array_new (); + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_new(); arch->type = RSPAMD_ARCHIVE_RAR; if (part->cd) { arch->archive_name = &part->cd->filename; } - rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, - arch); + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); while (p < end) { /* Crc16 */ start_section = p; - RAR_SKIP_BYTES (sizeof (guint16)); + RAR_SKIP_BYTES(sizeof(guint16)); type = *p; - p ++; - RAR_READ_UINT16 (flags); + p++; + RAR_READ_UINT16(flags); if (type == 0x73) { /* Main header, check for encryption */ @@ -455,13 +461,13 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, } } - RAR_READ_UINT16 (sz); + RAR_READ_UINT16(sz); if (flags & 0x8000) { /* We also need to read ADD_SIZE element */ guint32 tmp; - RAR_READ_UINT32 (tmp); + RAR_READ_UINT32(tmp); sz += tmp; /* This is also used as PACK_SIZE */ comp_sz = tmp; @@ -469,7 +475,7 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, if (sz == 0) { /* Zero sized block - error */ - msg_debug_archive ("rar archive is invalid (zero size block)"); + msg_debug_archive("rar archive is invalid (zero size block)"); return; } @@ -479,58 +485,58 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, /* File header */ /* Uncompressed size */ - RAR_READ_UINT32 (uncomp_sz); + RAR_READ_UINT32(uncomp_sz); /* Skip to NAME_SIZE element */ - RAR_SKIP_BYTES (11); - RAR_READ_UINT16 (fname_len); + RAR_SKIP_BYTES(11); + RAR_READ_UINT16(fname_len); - if (fname_len == 0 || fname_len > (gsize)(end - p)) { - msg_debug_archive ("rar archive is invalid (bad filename size: %d)", - fname_len); + if (fname_len == 0 || fname_len > (gsize) (end - p)) { + msg_debug_archive("rar archive is invalid (bad filename size: %d)", + fname_len); return; } /* Attrs */ - RAR_SKIP_BYTES (4); + RAR_SKIP_BYTES(4); if (flags & 0x100) { /* We also need to read HIGH_PACK_SIZE */ guint32 tmp; - RAR_READ_UINT32 (tmp); + RAR_READ_UINT32(tmp); sz += tmp; comp_sz += tmp; /* HIGH_UNP_SIZE */ - RAR_READ_UINT32 (tmp); + RAR_READ_UINT32(tmp); uncomp_sz += tmp; } - f = g_malloc0 (sizeof (*f)); + f = g_malloc0(sizeof(*f)); if (flags & 0x200) { /* We have unicode + normal version */ guchar *tmp; - tmp = memchr (p, '\0', fname_len); + tmp = memchr(p, '\0', fname_len); if (tmp != NULL) { /* Just use ASCII version */ - rspamd_archive_file_try_utf (task, arch, f, p, tmp - p); - msg_debug_archive ("found ascii filename in rarv4 archive: %v", - f->fname); + rspamd_archive_file_try_utf(task, arch, f, p, tmp - p); + msg_debug_archive("found ascii filename in rarv4 archive: %v", + f->fname); } else { /* We have UTF8 filename, use it as is */ - rspamd_archive_file_try_utf (task, arch, f, p, fname_len); - msg_debug_archive ("found utf filename in rarv4 archive: %v", - f->fname); + rspamd_archive_file_try_utf(task, arch, f, p, fname_len); + msg_debug_archive("found utf filename in rarv4 archive: %v", + f->fname); } } else { - rspamd_archive_file_try_utf (task, arch, f, p, fname_len); - msg_debug_archive ("found ascii (old) filename in rarv4 archive: %v", - f->fname); + rspamd_archive_file_try_utf(task, arch, f, p, fname_len); + msg_debug_archive("found ascii (old) filename in rarv4 archive: %v", + f->fname); } f->compressed_size = comp_sz; @@ -544,15 +550,15 @@ rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start, if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) { arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES; } - g_ptr_array_add (arch->files, f); + g_ptr_array_add(arch->files, f); } else { - g_free (f); + g_free(f); } } p = start_section; - RAR_SKIP_BYTES (sz); + RAR_SKIP_BYTES(sz); } end: @@ -562,16 +568,16 @@ end: } static void -rspamd_archive_process_rar (struct rspamd_task *task, - struct rspamd_mime_part *part) +rspamd_archive_process_rar(struct rspamd_task *task, + struct rspamd_mime_part *part) { const guchar *p, *end, *section_start; const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00}, - rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}; + rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}; const guint rar_encrypted_header = 4, rar_main_header = 1, - rar_file_header = 2; + rar_file_header = 2; guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0, - extra_sz = 0; + extra_sz = 0; struct rspamd_archive *arch; struct rspamd_archive_file *f; gint r; @@ -579,58 +585,58 @@ rspamd_archive_process_rar (struct rspamd_task *task, p = part->parsed_data.begin; end = p + part->parsed_data.len; - if ((gsize)(end - p) <= sizeof (rar_v5_magic)) { - msg_debug_archive ("rar archive is invalid (too small)"); + if ((gsize) (end - p) <= sizeof(rar_v5_magic)) { + msg_debug_archive("rar archive is invalid (too small)"); return; } - if (memcmp (p, rar_v5_magic, sizeof (rar_v5_magic)) == 0) { - p += sizeof (rar_v5_magic); + if (memcmp(p, rar_v5_magic, sizeof(rar_v5_magic)) == 0) { + p += sizeof(rar_v5_magic); } - else if (memcmp (p, rar_v4_magic, sizeof (rar_v4_magic)) == 0) { - p += sizeof (rar_v4_magic); + else if (memcmp(p, rar_v4_magic, sizeof(rar_v4_magic)) == 0) { + p += sizeof(rar_v4_magic); - rspamd_archive_process_rar_v4 (task, p, end, part); + rspamd_archive_process_rar_v4(task, p, end, part); return; } else { - msg_debug_archive ("rar archive is invalid (no rar magic)"); + msg_debug_archive("rar archive is invalid (no rar magic)"); return; } /* Rar v5 format */ - arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); - arch->files = g_ptr_array_new (); + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_new(); arch->type = RSPAMD_ARCHIVE_RAR; if (part->cd) { arch->archive_name = &part->cd->filename; } - rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, - arch); + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); /* Now we can have either encryption header or archive header */ /* Crc 32 */ - RAR_SKIP_BYTES (sizeof (guint32)); + RAR_SKIP_BYTES(sizeof(guint32)); /* Size */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); sz = vint; /* Type */ section_start = p; - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); type = vint; /* Header flags */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); flags = vint; if (flags & 0x1) { /* Have extra zone */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); } if (flags & 0x2) { /* Data zone is presented */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); sz += vint; } @@ -640,56 +646,56 @@ rspamd_archive_process_rar (struct rspamd_task *task, goto end; } else if (type != rar_main_header) { - msg_debug_archive ("rar archive is invalid (bad main header)"); + msg_debug_archive("rar archive is invalid (bad main header)"); return; } /* Nothing useful in main header */ p = section_start; - RAR_SKIP_BYTES (sz); + RAR_SKIP_BYTES(sz); while (p < end) { gboolean has_extra = FALSE; /* Read the next header */ /* Crc 32 */ - RAR_SKIP_BYTES (sizeof (guint32)); + RAR_SKIP_BYTES(sizeof(guint32)); /* Size */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); sz = vint; if (sz == 0) { /* Zero sized block - error */ - msg_debug_archive ("rar archive is invalid (zero size block)"); + msg_debug_archive("rar archive is invalid (zero size block)"); return; } section_start = p; /* Type */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); type = vint; /* Header flags */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); flags = vint; if (flags & 0x1) { /* Have extra zone */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); extra_sz = vint; has_extra = TRUE; } if (flags & 0x2) { /* Data zone is presented */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); sz += vint; comp_sz = vint; } if (type != rar_file_header) { p = section_start; - RAR_SKIP_BYTES (sz); + RAR_SKIP_BYTES(sz); } else { /* We have a file header, go forward */ @@ -697,40 +703,40 @@ rspamd_archive_process_rar (struct rspamd_task *task, bool is_directory = false; /* File header specific flags */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); flags = vint; /* Unpacked size */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); uncomp_sz = vint; /* Attributes */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); if (flags & 0x2) { /* Unix mtime */ - RAR_SKIP_BYTES (sizeof (guint32)); + RAR_SKIP_BYTES(sizeof(guint32)); } if (flags & 0x4) { /* Crc32 */ - RAR_SKIP_BYTES (sizeof (guint32)); + RAR_SKIP_BYTES(sizeof(guint32)); } if (flags & 0x1) { /* Ignore directories for sanity purposes */ is_directory = true; - msg_debug_archive ("skip directory record in a rar archive"); + msg_debug_archive("skip directory record in a rar archive"); } if (!is_directory) { /* Compression */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); /* Host OS */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); /* Filename length (finally!) */ - RAR_READ_VINT_SKIP (); + RAR_READ_VINT_SKIP(); fname_len = vint; if (fname_len == 0 || fname_len > (gsize) (end - p)) { - msg_debug_archive ("rar archive is invalid (bad filename size)"); + msg_debug_archive("rar archive is invalid (bad filename size)"); return; } @@ -741,7 +747,7 @@ rspamd_archive_process_rar (struct rspamd_task *task, rspamd_archive_file_try_utf(task, arch, f, p, fname_len); if (f->fname) { - msg_debug_archive ("added rarv5 file: %v", f->fname); + msg_debug_archive("added rarv5 file: %v", f->fname); g_ptr_array_add(arch->files, f); if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) { arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES; @@ -763,7 +769,7 @@ rspamd_archive_process_rar (struct rspamd_task *task, r = rspamd_archive_rar_read_vint(ex, extra_sz, &cur_sz); if (r == -1) { - msg_debug_archive ("rar archive is invalid (bad vint)"); + msg_debug_archive("rar archive is invalid (bad vint)"); return; } @@ -771,7 +777,7 @@ rspamd_archive_process_rar (struct rspamd_task *task, r = rspamd_archive_rar_read_vint(t, extra_sz - r, &sec_type); if (r == -1) { - msg_debug_archive ("rar archive is invalid (bad vint)"); + msg_debug_archive("rar archive is invalid (bad vint)"); return; } @@ -788,7 +794,7 @@ rspamd_archive_process_rar (struct rspamd_task *task, /* Restore p to the beginning of the header */ p = section_start; - RAR_SKIP_BYTES (sz); + RAR_SKIP_BYTES(sz); } } @@ -799,7 +805,7 @@ end: } static inline gint -rspamd_archive_7zip_read_vint (const guchar *start, gsize remain, guint64 *res) +rspamd_archive_7zip_read_vint(const guchar *start, gsize remain, guint64 *res) { /* * REAL_UINT64 means real UINT64. @@ -824,17 +830,17 @@ rspamd_archive_7zip_read_vint (const guchar *start, gsize remain, guint64 *res) t = *start; - if (!isset (&t, 7)) { + if (!isset(&t, 7)) { /* Trivial case */ *res = t; return 1; } else if (t == 0xFF) { - if (remain >= sizeof (guint64) + 1) { - memcpy (res, start + 1, sizeof (guint64)); - *res = GUINT64_FROM_LE (*res); + if (remain >= sizeof(guint64) + 1) { + memcpy(res, start + 1, sizeof(guint64)); + *res = GUINT64_FROM_LE(*res); - return sizeof (guint64) + 1; + return sizeof(guint64) + 1; } } else { @@ -843,64 +849,68 @@ rspamd_archive_7zip_read_vint (const guchar *start, gsize remain, guint64 *res) guint64 tgt; while (cur_bit > 0) { - if (!isset (&t, cur_bit)) { + if (!isset(&t, cur_bit)) { if (remain >= intlen + 1) { - memcpy (&tgt, start + 1, intlen); - tgt = GUINT64_FROM_LE (tgt); + memcpy(&tgt, start + 1, intlen); + tgt = GUINT64_FROM_LE(tgt); /* Shift back */ - tgt >>= sizeof (tgt) - NBBY * intlen; + tgt >>= sizeof(tgt) - NBBY * intlen; /* Add masked value */ - tgt += (guint64)(t & (bmask >> (NBBY - cur_bit))) - << (NBBY * intlen); + tgt += (guint64) (t & (bmask >> (NBBY - cur_bit))) + << (NBBY * intlen); *res = tgt; return intlen + 1; } } - cur_bit --; - intlen ++; + cur_bit--; + intlen++; } } return -1; } -#define SZ_READ_VINT_SKIP() do { \ - r = rspamd_archive_7zip_read_vint (p, end - p, &vint); \ - if (r == -1) { \ - msg_debug_archive ("7z archive is invalid (bad vint)"); \ - return; \ - } \ - p += r; \ -} while (0) -#define SZ_READ_VINT(var) do { \ - int r; \ - r = rspamd_archive_7zip_read_vint (p, end - p, &(var)); \ - if (r == -1) { \ - msg_debug_archive ("7z archive is invalid (bad vint): %s", G_STRLOC); \ - return NULL; \ - } \ - p += r; \ -} while (0) - -#define SZ_READ_UINT64(n) do { \ - if (end - p < (goffset)sizeof (guint64)) { \ - msg_debug_archive ("7zip archive is invalid (bad uint64): %s", G_STRLOC); \ - return; \ - } \ - memcpy (&(n), p, sizeof (guint64)); \ - n = GUINT64_FROM_LE(n); \ - p += sizeof (guint64); \ -} while (0) -#define SZ_SKIP_BYTES(n) do { \ - if (end - p >= (n)) { \ - p += (n); \ - } \ - else { \ - msg_debug_archive ("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint)(n), (gint)(end - p), G_STRLOC); \ - return NULL; \ - } \ -} while (0) +#define SZ_READ_VINT_SKIP() \ + do { \ + r = rspamd_archive_7zip_read_vint(p, end - p, &vint); \ + if (r == -1) { \ + msg_debug_archive("7z archive is invalid (bad vint)"); \ + return; \ + } \ + p += r; \ + } while (0) +#define SZ_READ_VINT(var) \ + do { \ + int r; \ + r = rspamd_archive_7zip_read_vint(p, end - p, &(var)); \ + if (r == -1) { \ + msg_debug_archive("7z archive is invalid (bad vint): %s", G_STRLOC); \ + return NULL; \ + } \ + p += r; \ + } while (0) + +#define SZ_READ_UINT64(n) \ + do { \ + if (end - p < (goffset) sizeof(guint64)) { \ + msg_debug_archive("7zip archive is invalid (bad uint64): %s", G_STRLOC); \ + return; \ + } \ + memcpy(&(n), p, sizeof(guint64)); \ + n = GUINT64_FROM_LE(n); \ + p += sizeof(guint64); \ + } while (0) +#define SZ_SKIP_BYTES(n) \ + do { \ + if (end - p >= (n)) { \ + p += (n); \ + } \ + else { \ + msg_debug_archive("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint) (n), (gint) (end - p), G_STRLOC); \ + return NULL; \ + } \ + } while (0) enum rspamd_7zip_header_mark { kEnd = 0x00, @@ -932,19 +942,19 @@ enum rspamd_7zip_header_mark { }; -#define _7Z_CRYPTO_MAIN_ZIP 0x06F10101 /* Main Zip crypto algo */ -#define _7Z_CRYPTO_RAR_29 0x06F10303 /* Rar29 AES-128 + (modified SHA-1) */ -#define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */ +#define _7Z_CRYPTO_MAIN_ZIP 0x06F10101 /* Main Zip crypto algo */ +#define _7Z_CRYPTO_RAR_29 0x06F10303 /* Rar29 AES-128 + (modified SHA-1) */ +#define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */ #define IS_SZ_ENCRYPTED(codec_id) (((codec_id) == _7Z_CRYPTO_MAIN_ZIP) || \ - ((codec_id) == _7Z_CRYPTO_RAR_29) || \ - ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256)) + ((codec_id) == _7Z_CRYPTO_RAR_29) || \ + ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256)) static const guchar * -rspamd_7zip_read_bits (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch, guint nbits, - guint *pbits_set) +rspamd_7zip_read_bits(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, guint nbits, + guint *pbits_set) { unsigned mask = 0, avail = 0, i; gboolean bit_set = 0; @@ -959,7 +969,7 @@ rspamd_7zip_read_bits (struct rspamd_task *task, bit_set = (avail & mask) ? 1 : 0; if (bit_set && pbits_set) { - (*pbits_set) ++; + (*pbits_set)++; } mask >>= 1; @@ -969,11 +979,11 @@ rspamd_7zip_read_bits (struct rspamd_task *task, } static const guchar * -rspamd_7zip_read_digest (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch, - guint64 num_streams, - guint *pdigest_read) +rspamd_7zip_read_digest(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, + guint64 num_streams, + guint *pdigest_read) { guchar all_defined = *p; guint64 i; @@ -998,14 +1008,14 @@ rspamd_7zip_read_digest (struct rspamd_task *task, return NULL; } - p = rspamd_7zip_read_bits (task, p, end, arch, num_streams, &num_defined); + p = rspamd_7zip_read_bits(task, p, end, arch, num_streams, &num_defined); if (p == NULL) { return NULL; } } - for (i = 0; i < num_defined; i ++) { + for (i = 0; i < num_defined; i++) { SZ_SKIP_BYTES(sizeof(guint32)); } @@ -1017,9 +1027,9 @@ rspamd_7zip_read_digest (struct rspamd_task *task, } static const guchar * -rspamd_7zip_read_pack_info (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch) +rspamd_7zip_read_pack_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) { guint64 pack_pos = 0, pack_streams = 0, i, cur_sz; guint num_digests = 0; @@ -1046,7 +1056,7 @@ rspamd_7zip_read_pack_info (struct rspamd_task *task, while (p != NULL && p < end) { t = *p; SZ_SKIP_BYTES(1); - msg_debug_archive ("7zip: read pack info %xc", t); + msg_debug_archive("7zip: read pack info %xc", t); switch (t) { case kSize: @@ -1057,15 +1067,15 @@ rspamd_7zip_read_pack_info (struct rspamd_task *task, break; case kCRC: /* CRCs are more complicated */ - p = rspamd_7zip_read_digest (task, p, end, arch, pack_streams, - &num_digests); + p = rspamd_7zip_read_digest(task, p, end, arch, pack_streams, + &num_digests); break; case kEnd: goto end; break; default: p = NULL; - msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC); + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); goto end; break; } @@ -1077,15 +1087,15 @@ end: } static const guchar * -rspamd_7zip_read_folder (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch, guint *pnstreams, guint *ndigests) +rspamd_7zip_read_folder(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, guint *pnstreams, guint *ndigests) { guint64 ncoders = 0, i, j, noutstreams = 0, ninstreams = 0; - SZ_READ_VINT (ncoders); + SZ_READ_VINT(ncoders); - for (i = 0; i < ncoders && p != NULL && p < end; i ++) { + for (i = 0; i < ncoders && p != NULL && p < end; i++) { guint64 sz, tmp; guchar t; /* @@ -1110,7 +1120,7 @@ rspamd_7zip_read_folder (struct rspamd_task *task, * } */ t = *p; - SZ_SKIP_BYTES (1); + SZ_SKIP_BYTES(1); sz = t & 0xF; /* Codec ID */ tmp = 0; @@ -1119,53 +1129,53 @@ rspamd_7zip_read_folder (struct rspamd_task *task, tmp += p[j]; } - msg_debug_archive ("7zip: read codec id: %L", tmp); + msg_debug_archive("7zip: read codec id: %L", tmp); - if (IS_SZ_ENCRYPTED (tmp)) { + if (IS_SZ_ENCRYPTED(tmp)) { arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED; } - SZ_SKIP_BYTES (sz); + SZ_SKIP_BYTES(sz); if (t & (1u << 4)) { /* Complex */ - SZ_READ_VINT (tmp); /* InStreams */ + SZ_READ_VINT(tmp); /* InStreams */ ninstreams += tmp; - SZ_READ_VINT (tmp); /* OutStreams */ + SZ_READ_VINT(tmp); /* OutStreams */ noutstreams += tmp; } else { /* XXX: is it correct ? */ - noutstreams ++; - ninstreams ++; + noutstreams++; + ninstreams++; } if (t & (1u << 5)) { /* Attributes ... */ - SZ_READ_VINT (tmp); /* Size of attrs */ - SZ_SKIP_BYTES (tmp); + SZ_READ_VINT(tmp); /* Size of attrs */ + SZ_SKIP_BYTES(tmp); } } if (noutstreams > 1) { /* BindPairs, WTF, huh */ - for (i = 0; i < noutstreams - 1; i ++) { + for (i = 0; i < noutstreams - 1; i++) { guint64 tmp; - SZ_READ_VINT (tmp); - SZ_READ_VINT (tmp); + SZ_READ_VINT(tmp); + SZ_READ_VINT(tmp); } } - gint64 npacked = (gint64)ninstreams - (gint64)noutstreams + 1; - msg_debug_archive ("7zip: instreams=%L, outstreams=%L, packed=%L", - ninstreams, noutstreams, npacked); + gint64 npacked = (gint64) ninstreams - (gint64) noutstreams + 1; + msg_debug_archive("7zip: instreams=%L, outstreams=%L, packed=%L", + ninstreams, noutstreams, npacked); if (npacked > 1) { /* Gah... */ - for (i = 0; i < npacked; i ++) { + for (i = 0; i < npacked; i++) { guint64 tmp; - SZ_READ_VINT (tmp); + SZ_READ_VINT(tmp); } } @@ -1176,10 +1186,10 @@ rspamd_7zip_read_folder (struct rspamd_task *task, } static const guchar * -rspamd_7zip_read_coders_info (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch, - guint *pnum_folders, guint *pnum_nodigest) +rspamd_7zip_read_coders_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, + guint *pnum_folders, guint *pnum_nodigest) { guint64 num_folders = 0, i, tmp; guchar t; @@ -1210,17 +1220,17 @@ rspamd_7zip_read_coders_info (struct rspamd_task *task, t = *p; SZ_SKIP_BYTES(1); - msg_debug_archive ("7zip: read coders info %xc", t); + msg_debug_archive("7zip: read coders info %xc", t); switch (t) { case kFolder: - SZ_READ_VINT (num_folders); - msg_debug_archive ("7zip: nfolders=%L", num_folders); + SZ_READ_VINT(num_folders); + msg_debug_archive("7zip: nfolders=%L", num_folders); if (*p != 0) { /* External folders */ SZ_SKIP_BYTES(1); - SZ_READ_VINT (tmp); + SZ_READ_VINT(tmp); } else { SZ_SKIP_BYTES(1); @@ -1231,14 +1241,14 @@ rspamd_7zip_read_coders_info (struct rspamd_task *task, } if (folder_nstreams) { - g_free (folder_nstreams); + g_free(folder_nstreams); } - folder_nstreams = g_malloc (sizeof (int) * num_folders); + folder_nstreams = g_malloc(sizeof(int) * num_folders); for (i = 0; i < num_folders && p != NULL && p < end; i++) { - p = rspamd_7zip_read_folder (task, p, end, arch, - &folder_nstreams[i], &num_digests); + p = rspamd_7zip_read_folder(task, p, end, arch, + &folder_nstreams[i], &num_digests); } } break; @@ -1246,14 +1256,14 @@ rspamd_7zip_read_coders_info (struct rspamd_task *task, for (i = 0; i < num_folders && p != NULL && p < end; i++) { if (folder_nstreams) { for (guint j = 0; j < folder_nstreams[i]; j++) { - SZ_READ_VINT (tmp); /* Unpacked size */ - msg_debug_archive ("7zip: unpacked size " - "(folder=%d, stream=%d) = %L", - (gint)i, j, tmp); + SZ_READ_VINT(tmp); /* Unpacked size */ + msg_debug_archive("7zip: unpacked size " + "(folder=%d, stream=%d) = %L", + (gint) i, j, tmp); } } else { - msg_err_task ("internal 7zip error"); + msg_err_task("internal 7zip error"); } } break; @@ -1271,15 +1281,15 @@ rspamd_7zip_read_coders_info (struct rspamd_task *task, * * I hope there *WAS* some reason to do such shit... */ - p = rspamd_7zip_read_digest (task, p, end, arch, num_digests, - &digests_read); + p = rspamd_7zip_read_digest(task, p, end, arch, num_digests, + &digests_read); break; case kEnd: goto end; break; default: p = NULL; - msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC); + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); goto end; break; } @@ -1295,17 +1305,17 @@ end: } if (folder_nstreams) { - g_free (folder_nstreams); + g_free(folder_nstreams); } return p; } static const guchar * -rspamd_7zip_read_substreams_info (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch, - guint num_folders, guint num_nodigest) +rspamd_7zip_read_substreams_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, + guint num_folders, guint num_nodigest) { guchar t; guint i; @@ -1316,8 +1326,8 @@ rspamd_7zip_read_substreams_info (struct rspamd_task *task, return NULL; } - folder_nstreams = g_alloca (sizeof (guint64) * num_folders); - memset (folder_nstreams, 0, sizeof (guint64) * num_folders); + folder_nstreams = g_alloca(sizeof(guint64) * num_folders); + memset(folder_nstreams, 0, sizeof(guint64) * num_folders); while (p != NULL && p < end) { /* @@ -1341,14 +1351,14 @@ rspamd_7zip_read_substreams_info (struct rspamd_task *task, t = *p; SZ_SKIP_BYTES(1); - msg_debug_archive ("7zip: read substream info %xc", t); + msg_debug_archive("7zip: read substream info %xc", t); switch (t) { case kNumUnPackStream: - for (i = 0; i < num_folders; i ++) { + for (i = 0; i < num_folders; i++) { guint64 tmp; - SZ_READ_VINT (tmp); + SZ_READ_VINT(tmp); folder_nstreams[i] = tmp; } break; @@ -1356,8 +1366,8 @@ rspamd_7zip_read_substreams_info (struct rspamd_task *task, /* * Read the comment in the rspamd_7zip_read_coders_info */ - p = rspamd_7zip_read_digest (task, p, end, arch, num_nodigest, - NULL); + p = rspamd_7zip_read_digest(task, p, end, arch, num_nodigest, + NULL); break; case kSize: /* @@ -1365,11 +1375,11 @@ rspamd_7zip_read_substreams_info (struct rspamd_task *task, * as there are no ways to proceed without it. * In fact, it is just absent in the real life... */ - for (i = 0; i < num_folders; i ++) { + for (i = 0; i < num_folders; i++) { for (guint j = 0; j < folder_nstreams[i]; j++) { guint64 tmp; - SZ_READ_VINT (tmp); /* Who cares indeed */ + SZ_READ_VINT(tmp); /* Who cares indeed */ } } break; @@ -1378,7 +1388,7 @@ rspamd_7zip_read_substreams_info (struct rspamd_task *task, break; default: p = NULL; - msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC); + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); goto end; break; } @@ -1389,9 +1399,9 @@ end: } static const guchar * -rspamd_7zip_read_main_streams_info (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch) +rspamd_7zip_read_main_streams_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) { guchar t; guint num_folders = 0, unknown_digests = 0; @@ -1399,7 +1409,7 @@ rspamd_7zip_read_main_streams_info (struct rspamd_task *task, while (p != NULL && p < end) { t = *p; SZ_SKIP_BYTES(1); - msg_debug_archive ("7zip: read main streams info %xc", t); + msg_debug_archive("7zip: read main streams info %xc", t); /* * @@ -1419,15 +1429,15 @@ rspamd_7zip_read_main_streams_info (struct rspamd_task *task, */ switch (t) { case kPackInfo: - p = rspamd_7zip_read_pack_info (task, p, end, arch); + p = rspamd_7zip_read_pack_info(task, p, end, arch); break; case kUnPackInfo: - p = rspamd_7zip_read_coders_info (task, p, end, arch, &num_folders, - &unknown_digests); + p = rspamd_7zip_read_coders_info(task, p, end, arch, &num_folders, + &unknown_digests); break; case kSubStreamsInfo: - p = rspamd_7zip_read_substreams_info (task, p, end, arch, num_folders, - unknown_digests); + p = rspamd_7zip_read_substreams_info(task, p, end, arch, num_folders, + unknown_digests); break; break; case kEnd: @@ -1435,7 +1445,7 @@ rspamd_7zip_read_main_streams_info (struct rspamd_task *task, break; default: p = NULL; - msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC); + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); goto end; break; } @@ -1446,9 +1456,9 @@ end: } static const guchar * -rspamd_7zip_read_archive_props (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch) +rspamd_7zip_read_archive_props(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) { guchar proptype; guint64 proplen; @@ -1487,36 +1497,36 @@ rspamd_7zip_read_archive_props (struct rspamd_task *task, } static GString * -rspamd_7zip_ucs2_to_utf8 (struct rspamd_task *task, const guchar *p, - const guchar *end) +rspamd_7zip_ucs2_to_utf8(struct rspamd_task *task, const guchar *p, + const guchar *end) { GString *res; goffset dest_pos = 0, src_pos = 0; - const gsize len = (end - p) / sizeof (guint16); + const gsize len = (end - p) / sizeof(guint16); guint16 *up; UChar32 wc; UBool is_error = 0; - res = g_string_sized_new ((end - p) * 3 / 2 + sizeof (wc) + 1); - up = (guint16 *)p; + res = g_string_sized_new((end - p) * 3 / 2 + sizeof(wc) + 1); + up = (guint16 *) p; while (src_pos < len) { - U16_NEXT (up, src_pos, len, wc); + U16_NEXT(up, src_pos, len, wc); if (wc > 0) { - U8_APPEND (res->str, dest_pos, - res->allocated_len - 1, - wc, is_error); + U8_APPEND(res->str, dest_pos, + res->allocated_len - 1, + wc, is_error); } if (is_error) { - g_string_free (res, TRUE); + g_string_free(res, TRUE); return NULL; } } - g_assert (dest_pos < res->allocated_len); + g_assert(dest_pos < res->allocated_len); res->len = dest_pos; res->str[dest_pos] = '\0'; @@ -1525,46 +1535,46 @@ rspamd_7zip_ucs2_to_utf8 (struct rspamd_task *task, const guchar *p, } static const guchar * -rspamd_7zip_read_files_info (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch) +rspamd_7zip_read_files_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) { guint64 nfiles = 0, sz, i; guchar t, b; struct rspamd_archive_file *fentry; - SZ_READ_VINT (nfiles); + SZ_READ_VINT(nfiles); - for (;p != NULL && p < end;) { + for (; p != NULL && p < end;) { t = *p; - SZ_SKIP_BYTES (1); + SZ_SKIP_BYTES(1); - msg_debug_archive ("7zip: read file data type %xc", t); + msg_debug_archive("7zip: read file data type %xc", t); if (t == kEnd) { goto end; } /* This is SO SPECIAL, gah */ - SZ_READ_VINT (sz); + SZ_READ_VINT(sz); switch (t) { case kEmptyStream: case kEmptyFile: case kAnti: /* AntiFile, OMFG */ - /* We don't care about these bits */ + /* We don't care about these bits */ case kCTime: case kATime: case kMTime: /* We don't care of these guys, but we still have to parse them, gah */ if (sz > 0) { - SZ_SKIP_BYTES (sz); + SZ_SKIP_BYTES(sz); } break; case kName: /* The most useful part in this whole bloody format */ b = *p; /* External flag */ - SZ_SKIP_BYTES (1); + SZ_SKIP_BYTES(1); if (b) { /* TODO: for the god sake, do something about external @@ -1572,10 +1582,10 @@ rspamd_7zip_read_files_info (struct rspamd_task *task, */ guint64 tmp; - SZ_READ_VINT (tmp); + SZ_READ_VINT(tmp); } else { - for (i = 0; i < nfiles; i ++) { + for (i = 0; i < nfiles; i++) { /* Zero terminated wchar_t: happy converting... */ /* First, find terminator */ const guchar *fend = NULL, *tp = p; @@ -1592,20 +1602,20 @@ rspamd_7zip_read_files_info (struct rspamd_task *task, if (fend == NULL || fend - p == 0) { /* Crap instead of fname */ - msg_debug_archive ("bad 7zip name; %s", G_STRLOC); + msg_debug_archive("bad 7zip name; %s", G_STRLOC); goto end; } - res = rspamd_7zip_ucs2_to_utf8 (task, p, fend); + res = rspamd_7zip_ucs2_to_utf8(task, p, fend); if (res != NULL) { - fentry = g_malloc0 (sizeof (*fentry)); + fentry = g_malloc0(sizeof(*fentry)); fentry->fname = res; - g_ptr_array_add (arch->files, fentry); - msg_debug_archive ("7zip: found file %v", res); + g_ptr_array_add(arch->files, fentry); + msg_debug_archive("7zip: found file %v", res); } else { - msg_debug_archive ("bad 7zip name; %s", G_STRLOC); + msg_debug_archive("bad 7zip name; %s", G_STRLOC); } /* Skip zero terminating character */ p = fend + 2; @@ -1615,12 +1625,12 @@ rspamd_7zip_read_files_info (struct rspamd_task *task, case kDummy: case kWinAttributes: if (sz > 0) { - SZ_SKIP_BYTES (sz); + SZ_SKIP_BYTES(sz); } break; default: p = NULL; - msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC); + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); goto end; break; } @@ -1631,15 +1641,15 @@ end: } static const guchar * -rspamd_7zip_read_next_section (struct rspamd_task *task, - const guchar *p, const guchar *end, - struct rspamd_archive *arch) +rspamd_7zip_read_next_section(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) { guchar t = *p; SZ_SKIP_BYTES(1); - msg_debug_archive ("7zip: read section %xc", t); + msg_debug_archive("7zip: read section %xc", t); switch (t) { case kHeader: @@ -1650,29 +1660,29 @@ rspamd_7zip_read_next_section (struct rspamd_task *task, * In fact, headers are just packed, but we assume it as * encrypted to distinguish from the normal archives */ - msg_debug_archive ("7zip: encoded header, needs to be uncompressed"); + msg_debug_archive("7zip: encoded header, needs to be uncompressed"); arch->flags |= RSPAMD_ARCHIVE_CANNOT_READ; p = NULL; /* Cannot get anything useful */ break; case kArchiveProperties: - p = rspamd_7zip_read_archive_props (task, p, end, arch); + p = rspamd_7zip_read_archive_props(task, p, end, arch); break; case kMainStreamsInfo: - p = rspamd_7zip_read_main_streams_info (task, p, end, arch); + p = rspamd_7zip_read_main_streams_info(task, p, end, arch); break; case kAdditionalStreamsInfo: - p = rspamd_7zip_read_main_streams_info (task, p, end, arch); + p = rspamd_7zip_read_main_streams_info(task, p, end, arch); break; case kFilesInfo: - p = rspamd_7zip_read_files_info (task, p, end, arch); + p = rspamd_7zip_read_files_info(task, p, end, arch); break; case kEnd: p = NULL; - msg_debug_archive ("7zip: read final section"); + msg_debug_archive("7zip: read final section"); break; default: p = NULL; - msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC); + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); break; } @@ -1680,8 +1690,8 @@ rspamd_7zip_read_next_section (struct rspamd_task *task, } static void -rspamd_archive_process_7zip (struct rspamd_task *task, - struct rspamd_mime_part *part) +rspamd_archive_process_7zip(struct rspamd_task *task, + struct rspamd_mime_part *part) { struct rspamd_archive *arch; const guchar *start, *p, *end; @@ -1692,30 +1702,30 @@ rspamd_archive_process_7zip (struct rspamd_task *task, p = start; end = p + part->parsed_data.len; - if (end - p <= sizeof (guint64) + sizeof (guint32) || - memcmp (p, sz_magic, sizeof (sz_magic)) != 0) { - msg_debug_archive ("7z archive is invalid (no 7z magic)"); + if (end - p <= sizeof(guint64) + sizeof(guint32) || + memcmp(p, sz_magic, sizeof(sz_magic)) != 0) { + msg_debug_archive("7z archive is invalid (no 7z magic)"); return; } - arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); - arch->files = g_ptr_array_new (); + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_new(); arch->type = RSPAMD_ARCHIVE_7ZIP; - rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, - arch); + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); /* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */ - p += sizeof (guint64) + sizeof (guint32); + p += sizeof(guint64) + sizeof(guint32); SZ_READ_UINT64(section_offset); SZ_READ_UINT64(section_length); - if (end - p > sizeof (guint32)) { - p += sizeof (guint32); + if (end - p > sizeof(guint32)) { + p += sizeof(guint32); } else { - msg_debug_archive ("7z archive is invalid (truncated crc)"); + msg_debug_archive("7z archive is invalid (truncated crc)"); return; } @@ -1724,12 +1734,13 @@ rspamd_archive_process_7zip (struct rspamd_task *task, p += section_offset; } else { - msg_debug_archive ("7z archive is invalid (incorrect section offset)"); + msg_debug_archive("7z archive is invalid (incorrect section offset)"); return; } - while ((p = rspamd_7zip_read_next_section (task, p, end, arch)) != NULL); + while ((p = rspamd_7zip_read_next_section(task, p, end, arch)) != NULL) + ; part->part_type = RSPAMD_MIME_PART_ARCHIVE; part->specific.arch = arch; @@ -1740,8 +1751,9 @@ rspamd_archive_process_7zip (struct rspamd_task *task, } static void -rspamd_archive_process_gzip (struct rspamd_task *task, - struct rspamd_mime_part *part) { +rspamd_archive_process_gzip(struct rspamd_task *task, + struct rspamd_mime_part *part) +{ struct rspamd_archive *arch; const guchar *start, *p, *end; const guchar gz_magic[] = {0x1F, 0x8B}; @@ -1751,20 +1763,20 @@ rspamd_archive_process_gzip (struct rspamd_task *task, p = start; end = p + part->parsed_data.len; - if (end - p <= 10 || memcmp (p, gz_magic, sizeof (gz_magic)) != 0) { - msg_debug_archive ("gzip archive is invalid (no gzip magic)"); + if (end - p <= 10 || memcmp(p, gz_magic, sizeof(gz_magic)) != 0) { + msg_debug_archive("gzip archive is invalid (no gzip magic)"); return; } - arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch)); - arch->files = g_ptr_array_sized_new (1); + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_sized_new(1); arch->type = RSPAMD_ARCHIVE_GZIP; if (part->cd) { arch->archive_name = &part->cd->filename; } - rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor, - arch); + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); flags = p[3]; @@ -1786,11 +1798,11 @@ rspamd_archive_process_gzip (struct rspamd_task *task, /* Optional section */ guint16 optlen = 0; - RAR_READ_UINT16 (optlen); + RAR_READ_UINT16(optlen); if (end <= p + optlen) { - msg_debug_archive ("gzip archive is invalid, bad extra length: %d", - (int)optlen); + msg_debug_archive("gzip archive is invalid, bad extra length: %d", + (int) optlen); return; } @@ -1806,13 +1818,13 @@ rspamd_archive_process_gzip (struct rspamd_task *task, if (p > fname_start) { struct rspamd_archive_file *f; - f = g_malloc0 (sizeof (*f)); + f = g_malloc0(sizeof(*f)); - rspamd_archive_file_try_utf (task, arch, f, - fname_start, p - fname_start); + rspamd_archive_file_try_utf(task, arch, f, + fname_start, p - fname_start); if (f->fname) { - g_ptr_array_add (arch->files, f); + g_ptr_array_add(arch->files, f); if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) { arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES; @@ -1820,19 +1832,19 @@ rspamd_archive_process_gzip (struct rspamd_task *task, } else { /* Invalid filename, skip */ - g_free (f); + g_free(f); } goto set; } } - p ++; + p++; } /* Wrong filename, not zero terminated */ - msg_debug_archive ("gzip archive is invalid, bad filename at pos %d", - (int)(p - start)); + msg_debug_archive("gzip archive is invalid, bad filename at pos %d", + (int) (p - start)); return; } @@ -1841,50 +1853,50 @@ rspamd_archive_process_gzip (struct rspamd_task *task, if (part->cd && part->cd->filename.len > 0) { const gchar *dot_pos, *slash_pos; - dot_pos = rspamd_memrchr (part->cd->filename.begin, '.', - part->cd->filename.len); + dot_pos = rspamd_memrchr(part->cd->filename.begin, '.', + part->cd->filename.len); if (dot_pos) { struct rspamd_archive_file *f; - slash_pos = rspamd_memrchr (part->cd->filename.begin, '/', - part->cd->filename.len); + slash_pos = rspamd_memrchr(part->cd->filename.begin, '/', + part->cd->filename.len); if (slash_pos && slash_pos < dot_pos) { - f = g_malloc0 (sizeof (*f)); - f->fname = g_string_sized_new (dot_pos - slash_pos); - g_string_append_len (f->fname, slash_pos + 1, - dot_pos - slash_pos - 1); + f = g_malloc0(sizeof(*f)); + f->fname = g_string_sized_new(dot_pos - slash_pos); + g_string_append_len(f->fname, slash_pos + 1, + dot_pos - slash_pos - 1); - msg_debug_archive ("fallback to gzip filename based on cd: %v", - f->fname); + msg_debug_archive("fallback to gzip filename based on cd: %v", + f->fname); - g_ptr_array_add (arch->files, f); + g_ptr_array_add(arch->files, f); goto set; } else { const gchar *fname_start = part->cd->filename.begin; - f = g_malloc0 (sizeof (*f)); + f = g_malloc0(sizeof(*f)); - if (memchr (fname_start, '.', part->cd->filename.len) != dot_pos) { + if (memchr(fname_start, '.', part->cd->filename.len) != dot_pos) { /* Double dots, something like foo.exe.gz */ - f->fname = g_string_sized_new (dot_pos - fname_start); - g_string_append_len (f->fname, fname_start, - dot_pos - fname_start); + f->fname = g_string_sized_new(dot_pos - fname_start); + g_string_append_len(f->fname, fname_start, + dot_pos - fname_start); } else { /* Single dot, something like foo.gzz */ - f->fname = g_string_sized_new (part->cd->filename.len); - g_string_append_len (f->fname, fname_start, - part->cd->filename.len); + f->fname = g_string_sized_new(part->cd->filename.len); + g_string_append_len(f->fname, fname_start, + part->cd->filename.len); } - msg_debug_archive ("fallback to gzip filename based on cd: %v", - f->fname); + msg_debug_archive("fallback to gzip filename based on cd: %v", + f->fname); - g_ptr_array_add (arch->files, f); + g_ptr_array_add(arch->files, f); goto set; } @@ -1901,25 +1913,24 @@ set: } static gboolean -rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str, - const guchar *magic_start, gsize magic_len) +rspamd_archive_cheat_detect(struct rspamd_mime_part *part, const gchar *str, + const guchar *magic_start, gsize magic_len) { struct rspamd_content_type *ct; const gchar *p; rspamd_ftok_t srch, *fname; ct = part->ct; - RSPAMD_FTOK_ASSIGN (&srch, "application"); + RSPAMD_FTOK_ASSIGN(&srch, "application"); - if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp (&ct->type, - &srch) == 0) { - if (rspamd_substring_search_caseless (ct->subtype.begin, ct->subtype.len, - str, strlen (str)) != -1) { + if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp(&ct->type, &srch) == 0) { + if (rspamd_substring_search_caseless(ct->subtype.begin, ct->subtype.len, + str, strlen(str)) != -1) { /* We still need to check magic, see #1848 */ if (magic_start != NULL) { if (part->parsed_data.len > magic_len && - memcmp (part->parsed_data.begin, - magic_start, magic_len) == 0) { + memcmp(part->parsed_data.begin, + magic_start, magic_len) == 0) { return TRUE; } /* No magic, refuse this type of archive */ @@ -1934,15 +1945,15 @@ rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str, if (part->cd) { fname = &part->cd->filename; - if (fname && fname->len > strlen (str)) { - p = fname->begin + fname->len - strlen (str); + if (fname && fname->len > strlen(str)) { + p = fname->begin + fname->len - strlen(str); - if (rspamd_lc_cmp (p, str, strlen (str)) == 0) { + if (rspamd_lc_cmp(p, str, strlen(str)) == 0) { if (*(p - 1) == '.') { if (magic_start != NULL) { if (part->parsed_data.len > magic_len && - memcmp (part->parsed_data.begin, - magic_start, magic_len) == 0) { + memcmp(part->parsed_data.begin, + magic_start, magic_len) == 0) { return TRUE; } /* No magic, refuse this type of archive */ @@ -1956,7 +1967,7 @@ rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str, if (magic_start != NULL) { if (part->parsed_data.len > magic_len && - memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) { + memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) { return TRUE; } } @@ -1964,7 +1975,7 @@ rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str, else { if (magic_start != NULL) { if (part->parsed_data.len > magic_len && - memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) { + memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) { return TRUE; } } @@ -1973,8 +1984,7 @@ rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str, return FALSE; } -void -rspamd_archives_process (struct rspamd_task *task) +void rspamd_archives_process(struct rspamd_task *task) { guint i; struct rspamd_mime_part *part; @@ -1983,34 +1993,35 @@ rspamd_archives_process (struct rspamd_task *task) const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; const guchar gz_magic[] = {0x1F, 0x8B, 0x08}; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) { if (part->parsed_data.len > 0) { - if (rspamd_archive_cheat_detect (part, "zip", - zip_magic, sizeof (zip_magic))) { - rspamd_archive_process_zip (task, part); + if (rspamd_archive_cheat_detect(part, "zip", + zip_magic, sizeof(zip_magic))) { + rspamd_archive_process_zip(task, part); } - else if (rspamd_archive_cheat_detect (part, "rar", - rar_magic, sizeof (rar_magic))) { - rspamd_archive_process_rar (task, part); + else if (rspamd_archive_cheat_detect(part, "rar", + rar_magic, sizeof(rar_magic))) { + rspamd_archive_process_rar(task, part); } - else if (rspamd_archive_cheat_detect (part, "7z", - sz_magic, sizeof (sz_magic))) { - rspamd_archive_process_7zip (task, part); + else if (rspamd_archive_cheat_detect(part, "7z", + sz_magic, sizeof(sz_magic))) { + rspamd_archive_process_7zip(task, part); } - else if (rspamd_archive_cheat_detect (part, "gz", - gz_magic, sizeof (gz_magic))) { - rspamd_archive_process_gzip (task, part); + else if (rspamd_archive_cheat_detect(part, "gz", + gz_magic, sizeof(gz_magic))) { + rspamd_archive_process_gzip(task, part); } if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) && - part->part_type == RSPAMD_MIME_PART_ARCHIVE && - part->specific.arch) { + part->part_type == RSPAMD_MIME_PART_ARCHIVE && + part->specific.arch) { struct rspamd_archive *arch = part->specific.arch; - msg_info_task ("found %s archive with incorrect content-type: %T/%T", - rspamd_archive_type_str (arch->type), - &part->ct->type, &part->ct->subtype); + msg_info_task("found %s archive with incorrect content-type: %T/%T", + rspamd_archive_type_str(arch->type), + &part->ct->type, &part->ct->subtype); if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) { part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; @@ -2023,7 +2034,7 @@ rspamd_archives_process (struct rspamd_task *task) const gchar * -rspamd_archive_type_str (enum rspamd_archive_type type) +rspamd_archive_type_str(enum rspamd_archive_type type) { const gchar *ret = "unknown"; diff --git a/src/libmime/archives.h b/src/libmime/archives.h index e0ac7d963..56beb6227 100644 --- a/src/libmime/archives.h +++ b/src/libmime/archives.h @@ -18,7 +18,7 @@ #include "config.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -58,14 +58,14 @@ struct rspamd_archive { /** * Process archives from a worker task */ -void rspamd_archives_process (struct rspamd_task *task); +void rspamd_archives_process(struct rspamd_task *task); /** * Get textual representation of an archive's type */ -const gchar *rspamd_archive_type_str (enum rspamd_archive_type type); +const gchar *rspamd_archive_type_str(enum rspamd_archive_type type); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c index 40f0fd351..765cb8799 100644 --- a/src/libmime/content_type.c +++ b/src/libmime/content_type.c @@ -21,18 +21,18 @@ #include "libmime/mime_encoding.h" static gboolean -rspamd_rfc2231_decode (rspamd_mempool_t *pool, - struct rspamd_content_type_param *param, - gchar *value_start, gchar *value_end) +rspamd_rfc2231_decode(rspamd_mempool_t *pool, + struct rspamd_content_type_param *param, + gchar *value_start, gchar *value_end) { gchar *quote_pos; - quote_pos = memchr (value_start, '\'', value_end - value_start); + quote_pos = memchr(value_start, '\'', value_end - value_start); if (quote_pos == NULL) { /* Plain percent encoding */ - gsize r = rspamd_url_decode (value_start, value_start, - value_end - value_start); + gsize r = rspamd_url_decode(value_start, value_start, + value_end - value_start); param->value.begin = value_start; param->value.len = r; } @@ -49,13 +49,13 @@ rspamd_rfc2231_decode (rspamd_mempool_t *pool, ctok.len = quote_pos - value_start; if (ctok.len > 0) { - charset = rspamd_mime_detect_charset (&ctok, pool); + charset = rspamd_mime_detect_charset(&ctok, pool); } /* Now, we can check for either next quote sign or, eh, ignore that */ value_start = quote_pos + 1; - quote_pos = memchr (value_start, '\'', value_end - value_start); + quote_pos = memchr(value_start, '\'', value_end - value_start); if (quote_pos) { /* Ignore language */ @@ -63,31 +63,31 @@ rspamd_rfc2231_decode (rspamd_mempool_t *pool, } /* Perform percent decoding */ - gsize r = rspamd_url_decode (value_start, value_start, - value_end - value_start); + gsize r = rspamd_url_decode(value_start, value_start, + value_end - value_start); GError *err = NULL; if (charset == NULL) { /* Try heuristic */ - charset = rspamd_mime_charset_find_by_content (value_start, r, TRUE); + charset = rspamd_mime_charset_find_by_content(value_start, r, TRUE); } if (charset == NULL) { - msg_warn_pool ("cannot convert parameter from charset %T", &ctok); + msg_warn_pool("cannot convert parameter from charset %T", &ctok); return FALSE; } - param->value.begin = rspamd_mime_text_to_utf8 (pool, - value_start, r, - charset, ¶m->value.len, &err); + param->value.begin = rspamd_mime_text_to_utf8(pool, + value_start, r, + charset, ¶m->value.len, &err); if (param->value.begin == NULL) { - msg_warn_pool ("cannot convert parameter from charset %s: %e", - charset, err); + msg_warn_pool("cannot convert parameter from charset %s: %e", + charset, err); if (err) { - g_error_free (err); + g_error_free(err); } return FALSE; @@ -100,14 +100,14 @@ rspamd_rfc2231_decode (rspamd_mempool_t *pool, } static gboolean -rspamd_param_maybe_rfc2231_process (rspamd_mempool_t *pool, - struct rspamd_content_type_param *param, - gchar *name_start, gchar *name_end, - gchar *value_start, gchar *value_end) +rspamd_param_maybe_rfc2231_process(rspamd_mempool_t *pool, + struct rspamd_content_type_param *param, + gchar *name_start, gchar *name_end, + gchar *value_start, gchar *value_end) { const gchar *star_pos; - star_pos = memchr (name_start, '*', name_end - name_start); + star_pos = memchr(name_start, '*', name_end - name_start); if (star_pos == NULL) { return FALSE; @@ -121,7 +121,7 @@ rspamd_param_maybe_rfc2231_process (rspamd_mempool_t *pool, if (star_pos == name_end - 1) { /* First */ - if (rspamd_rfc2231_decode (pool, param, value_start, value_end)) { + if (rspamd_rfc2231_decode(pool, param, value_start, value_end)) { param->name.begin = name_start; param->name.len = name_end - name_start - 1; } @@ -131,11 +131,11 @@ rspamd_param_maybe_rfc2231_process (rspamd_mempool_t *pool, /* Check number */ gulong tmp; - if (!rspamd_strtoul (star_pos + 1, name_end - star_pos - 2, &tmp)) { + if (!rspamd_strtoul(star_pos + 1, name_end - star_pos - 2, &tmp)) { return FALSE; } - param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE|RSPAMD_CONTENT_PARAM_RFC2231; + param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE | RSPAMD_CONTENT_PARAM_RFC2231; param->rfc2231_id = tmp; param->name.begin = name_start; param->name.len = star_pos - name_start; @@ -148,7 +148,7 @@ rspamd_param_maybe_rfc2231_process (rspamd_mempool_t *pool, /* Second case */ gulong tmp; - if (!rspamd_strtoul (star_pos + 1, name_end - star_pos - 1, &tmp)) { + if (!rspamd_strtoul(star_pos + 1, name_end - star_pos - 1, &tmp)) { return FALSE; } @@ -164,16 +164,16 @@ rspamd_param_maybe_rfc2231_process (rspamd_mempool_t *pool, } static gint32 -rspamd_cmp_pieces (struct rspamd_content_type_param *p1, struct rspamd_content_type_param *p2) +rspamd_cmp_pieces(struct rspamd_content_type_param *p1, struct rspamd_content_type_param *p2) { return p1->rfc2231_id - p2->rfc2231_id; } static void -rspamd_postprocess_ct_attributes (rspamd_mempool_t *pool, - GHashTable *htb, - void (*proc)(rspamd_mempool_t *, struct rspamd_content_type_param *, gpointer ud), - gpointer procd) +rspamd_postprocess_ct_attributes(rspamd_mempool_t *pool, + GHashTable *htb, + void (*proc)(rspamd_mempool_t *, struct rspamd_content_type_param *, gpointer ud), + gpointer procd) { GHashTableIter it; gpointer k, v; @@ -183,10 +183,10 @@ rspamd_postprocess_ct_attributes (rspamd_mempool_t *pool, return; } - g_hash_table_iter_init (&it, htb); + g_hash_table_iter_init(&it, htb); - while (g_hash_table_iter_next (&it, &k, &v)) { - param = (struct rspamd_content_type_param *)v; + while (g_hash_table_iter_next(&it, &k, &v)) { + param = (struct rspamd_content_type_param *) v; if (param->flags & RSPAMD_CONTENT_PARAM_PIECEWISE) { /* Reconstruct param */ @@ -194,23 +194,25 @@ rspamd_postprocess_ct_attributes (rspamd_mempool_t *pool, gchar *ndata, *pos; sorted = param; - DL_SORT (sorted, rspamd_cmp_pieces); + DL_SORT(sorted, rspamd_cmp_pieces); - DL_FOREACH (sorted, cur) { + DL_FOREACH(sorted, cur) + { tlen += cur->value.len; } - ndata = rspamd_mempool_alloc (pool, tlen); + ndata = rspamd_mempool_alloc(pool, tlen); pos = ndata; - DL_FOREACH (sorted, cur) { - memcpy (pos, cur->value.begin, cur->value.len); + DL_FOREACH(sorted, cur) + { + memcpy(pos, cur->value.begin, cur->value.len); pos += cur->value.len; } if (param->flags & RSPAMD_CONTENT_PARAM_RFC2231) { - if (!rspamd_rfc2231_decode (pool, param, - ndata, pos)) { + if (!rspamd_rfc2231_decode(pool, param, + ndata, pos)) { param->flags |= RSPAMD_CONTENT_PARAM_BROKEN; param->value.begin = ndata; param->value.len = tlen; @@ -230,7 +232,7 @@ rspamd_postprocess_ct_attributes (rspamd_mempool_t *pool, if (param->value.begin != NULL && param->value.len > 0) { param->value.begin = rspamd_mime_header_decode(pool, param->value.begin, - param->value.len, &invalid_utf); + param->value.len, &invalid_utf); param->value.len = strlen(param->value.begin); } @@ -238,38 +240,38 @@ rspamd_postprocess_ct_attributes (rspamd_mempool_t *pool, param->flags |= RSPAMD_CONTENT_PARAM_BROKEN; } - proc (pool, param, procd); + proc(pool, param, procd); } } static void -rspamd_content_type_postprocess (rspamd_mempool_t *pool, - struct rspamd_content_type_param *param, - gpointer ud) +rspamd_content_type_postprocess(rspamd_mempool_t *pool, + struct rspamd_content_type_param *param, + gpointer ud) { rspamd_ftok_t srch; struct rspamd_content_type_param *found = NULL; - struct rspamd_content_type *ct = (struct rspamd_content_type *)ud; + struct rspamd_content_type *ct = (struct rspamd_content_type *) ud; - RSPAMD_FTOK_ASSIGN (&srch, "charset"); + RSPAMD_FTOK_ASSIGN(&srch, "charset"); - if (rspamd_ftok_icase_equal (¶m->name, &srch)) { + if (rspamd_ftok_icase_equal(¶m->name, &srch)) { /* Adjust charset */ found = param; ct->charset.begin = param->value.begin; ct->charset.len = param->value.len; } - RSPAMD_FTOK_ASSIGN (&srch, "boundary"); + RSPAMD_FTOK_ASSIGN(&srch, "boundary"); - if (rspamd_ftok_icase_equal (¶m->name, &srch)) { + if (rspamd_ftok_icase_equal(¶m->name, &srch)) { found = param; gchar *lc_boundary; /* Adjust boundary */ - lc_boundary = rspamd_mempool_alloc (pool, param->value.len); - memcpy (lc_boundary, param->value.begin, param->value.len); - rspamd_str_lc (lc_boundary, param->value.len); + lc_boundary = rspamd_mempool_alloc(pool, param->value.len); + memcpy(lc_boundary, param->value.begin, param->value.len); + rspamd_str_lc(lc_boundary, param->value.len); ct->boundary.begin = lc_boundary; ct->boundary.len = param->value.len; /* Preserve original (case sensitive) boundary */ @@ -278,49 +280,48 @@ rspamd_content_type_postprocess (rspamd_mempool_t *pool, } if (!found) { - RSPAMD_FTOK_ASSIGN (&srch, "name"); - if (!rspamd_ftok_icase_equal (¶m->name, &srch)) { + RSPAMD_FTOK_ASSIGN(&srch, "name"); + if (!rspamd_ftok_icase_equal(¶m->name, &srch)) { /* Just lowercase */ - rspamd_str_lc_utf8 ((gchar *) param->value.begin, param->value.len); + rspamd_str_lc_utf8((gchar *) param->value.begin, param->value.len); } } } static void -rspamd_content_disposition_postprocess (rspamd_mempool_t *pool, - struct rspamd_content_type_param *param, - gpointer ud) +rspamd_content_disposition_postprocess(rspamd_mempool_t *pool, + struct rspamd_content_type_param *param, + gpointer ud) { rspamd_ftok_t srch; - struct rspamd_content_disposition *cd = (struct rspamd_content_disposition *)ud; + struct rspamd_content_disposition *cd = (struct rspamd_content_disposition *) ud; srch.begin = "filename"; srch.len = 8; - if (rspamd_ftok_icase_equal (¶m->name, &srch)) { + if (rspamd_ftok_icase_equal(¶m->name, &srch)) { /* Adjust filename */ cd->filename.begin = param->value.begin; cd->filename.len = param->value.len; } } -void -rspamd_content_type_add_param (rspamd_mempool_t *pool, - struct rspamd_content_type *ct, - gchar *name_start, gchar *name_end, - gchar *value_start, gchar *value_end) +void rspamd_content_type_add_param(rspamd_mempool_t *pool, + struct rspamd_content_type *ct, + gchar *name_start, gchar *name_end, + gchar *value_start, gchar *value_end) { struct rspamd_content_type_param *nparam; rspamd_ftok_t srch; struct rspamd_content_type_param *found = NULL; - g_assert (ct != NULL); + g_assert(ct != NULL); - nparam = rspamd_mempool_alloc0 (pool, sizeof (*nparam)); - rspamd_str_lc (name_start, name_end - name_start); + nparam = rspamd_mempool_alloc0(pool, sizeof(*nparam)); + rspamd_str_lc(name_start, name_end - name_start); - if (!rspamd_param_maybe_rfc2231_process (pool, nparam, name_start, - name_end, value_start, value_end)) { + if (!rspamd_param_maybe_rfc2231_process(pool, nparam, name_start, + name_end, value_start, value_end)) { nparam->name.begin = name_start; nparam->name.len = name_end - name_start; nparam->value.begin = value_start; @@ -331,23 +332,24 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool, srch.len = nparam->name.len; if (ct->attrs) { - found = g_hash_table_lookup (ct->attrs, &srch); - } else { - ct->attrs = g_hash_table_new (rspamd_ftok_icase_hash, - rspamd_ftok_icase_equal); + found = g_hash_table_lookup(ct->attrs, &srch); + } + else { + ct->attrs = g_hash_table_new(rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal); } if (!found) { - DL_APPEND (found, nparam); - g_hash_table_insert (ct->attrs, &nparam->name, nparam); + DL_APPEND(found, nparam); + g_hash_table_insert(ct->attrs, &nparam->name, nparam); } else { - DL_APPEND (found, nparam); + DL_APPEND(found, nparam); } } static struct rspamd_content_type * -rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) +rspamd_content_type_parser(gchar *in, gsize len, rspamd_mempool_t *pool) { guint obraces = 0, ebraces = 0, qlen = 0; gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL; @@ -364,45 +366,50 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) parse_space, parse_quoted, parse_comment, - } state = parse_space, next_state = parse_type; + } state = parse_space, + next_state = parse_type; p = in; c = p; end = p + len; - memset (&val, 0, sizeof (val)); + memset(&val, 0, sizeof(val)); val.cpy = in; while (p < end) { switch (state) { case parse_type: - if (g_ascii_isspace (*p) || *p == ';') { + if (g_ascii_isspace(*p) || *p == ';') { /* We have type without subtype */ val.type.begin = c; val.type.len = p - c; state = parse_after_subtype; - } else if (*p == '/') { + } + else if (*p == '/') { val.type.begin = c; val.type.len = p - c; state = parse_space; next_state = parse_subtype; p++; - } else { + } + else { p++; } break; case parse_subtype: - if (g_ascii_isspace (*p) || *p == ';') { + if (g_ascii_isspace(*p) || *p == ';') { val.subtype.begin = c; val.subtype.len = p - c; state = parse_after_subtype; - } else { + } + else { p++; } break; case parse_after_subtype: - if (*p == ';' || g_ascii_isspace (*p)) { + if (*p == ';' || g_ascii_isspace(*p)) { p++; - } else if (*p == '(') { + } + else if (*p == '(') { c = p; state = parse_comment; next_state = parse_param_name; @@ -412,7 +419,8 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) pname_end = NULL; eqsign_seen = FALSE; p++; - } else { + } + else { c = p; state = parse_param_name; pname_start = NULL; @@ -427,33 +435,39 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) state = parse_param_after_name; eqsign_seen = TRUE; p++; - } else if (g_ascii_isspace (*p)) { + } + else if (g_ascii_isspace(*p)) { pname_start = c; pname_end = p; state = parse_param_after_name; - } else { + } + else { p++; } break; case parse_param_after_name: - if (g_ascii_isspace (*p)) { + if (g_ascii_isspace(*p)) { p++; - } else if (*p == '=') { + } + else if (*p == '=') { if (eqsign_seen) { /* Treat as value start */ c = p; eqsign_seen = FALSE; state = parse_param_value; p++; - } else { + } + else { eqsign_seen = TRUE; p++; } - } else { + } + else { if (eqsign_seen) { state = parse_param_value; c = p; - } else { + } + else { /* Invalid parameter without value */ c = p; state = parse_param_name; @@ -468,21 +482,22 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) c = p; state = parse_quoted; next_state = parse_param_value_after_quote; - } else if (g_ascii_isspace (*p)) { + } + else if (g_ascii_isspace(*p)) { if (pname_start && pname_end && pname_end > pname_start) { - rspamd_content_type_add_param (pool, &val, pname_start, - pname_end, c, p); - + rspamd_content_type_add_param(pool, &val, pname_start, + pname_end, c, p); } state = parse_space; next_state = parse_param_name; pname_start = NULL; pname_end = NULL; - } else if (*p == '(') { + } + else if (*p == '(') { if (pname_start && pname_end && pname_end > pname_start) { - rspamd_content_type_add_param (pool, &val, pname_start, - pname_end, c, p); + rspamd_content_type_add_param(pool, &val, pname_start, + pname_end, c, p); } obraces = 1; @@ -495,11 +510,11 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) } else if (*p == ';') { if (pname_start && pname_end && pname_end > pname_start) { - rspamd_content_type_add_param (pool, &val, pname_start, - pname_end, c, p); + rspamd_content_type_add_param(pool, &val, pname_start, + pname_end, c, p); } - p ++; + p++; state = parse_space; next_state = parse_param_name; pname_start = NULL; @@ -511,12 +526,12 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) break; case parse_param_value_after_quote: if (pname_start && pname_end && pname_end > pname_start) { - rspamd_content_type_add_param (pool, &val, pname_start, - pname_end, c, c + qlen); + rspamd_content_type_add_param(pool, &val, pname_start, + pname_end, c, c + qlen); } if (*p == '"') { - p ++; + p++; if (p == end) { /* Last quote: done... */ @@ -525,7 +540,7 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) } if (*p == ';') { - p ++; + p++; state = parse_space; next_state = parse_param_name; pname_start = NULL; @@ -535,12 +550,13 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) } /* We should not normally be here in fact */ - if (g_ascii_isspace (*p)) { + if (g_ascii_isspace(*p)) { state = parse_space; next_state = parse_param_name; pname_start = NULL; pname_end = NULL; - } else if (*p == '(') { + } + else if (*p == '(') { obraces = 1; ebraces = 0; p++; @@ -548,7 +564,8 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) next_state = parse_param_name; pname_start = NULL; pname_end = NULL; - } else { + } + else { state = parse_param_name; pname_start = NULL; pname_end = NULL; @@ -560,13 +577,16 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) /* Quoted pair */ if (p + 1 < end) { p += 2; - } else { + } + else { p++; } - } else if (*p == '"') { + } + else if (*p == '"') { qlen = p - c; state = next_state; - } else { + } + else { p++; } break; @@ -574,31 +594,36 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) if (*p == '(') { obraces++; p++; - } else if (*p == ')') { + } + else if (*p == ')') { ebraces++; p++; if (ebraces == obraces && p < end) { - if (g_ascii_isspace (*p)) { + if (g_ascii_isspace(*p)) { state = parse_space; - } else { + } + else { c = p; state = next_state; } } - } else { + } + else { p++; } break; case parse_space: - if (g_ascii_isspace (*p)) { + if (g_ascii_isspace(*p)) { p++; - } else if (*p == '(') { + } + else if (*p == '(') { obraces = 1; ebraces = 0; p++; state = parse_comment; - } else { + } + else { c = p; state = next_state; } @@ -619,18 +644,17 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) case parse_param_value: if (pname_start && pname_end && pname_end > pname_start) { if (p > c && *(p - 1) == ';') { - p --; + p--; } - rspamd_content_type_add_param (pool, &val, pname_start, - pname_end, c, p); - + rspamd_content_type_add_param(pool, &val, pname_start, + pname_end, c, p); } break; case parse_param_value_after_quote: if (pname_start && pname_end && pname_end > pname_start) { - rspamd_content_type_add_param (pool, &val, pname_start, - pname_end, c, c + qlen); + rspamd_content_type_add_param(pool, &val, pname_start, + pname_end, c, c + qlen); } break; default: @@ -640,22 +664,22 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) if (val.type.len > 0) { gchar *tmp; - res = rspamd_mempool_alloc (pool, sizeof (val)); - memcpy (res, &val, sizeof (val)); + res = rspamd_mempool_alloc(pool, sizeof(val)); + memcpy(res, &val, sizeof(val)); /* * Lowercase type and subtype as they are specified as case insensitive * in rfc2045 section 5.1 */ - tmp = rspamd_mempool_alloc (pool, val.type.len); - memcpy (tmp, val.type.begin, val.type.len); - rspamd_str_lc (tmp, val.type.len); + tmp = rspamd_mempool_alloc(pool, val.type.len); + memcpy(tmp, val.type.begin, val.type.len); + rspamd_str_lc(tmp, val.type.len); res->type.begin = tmp; if (val.subtype.len > 0) { - tmp = rspamd_mempool_alloc (pool, val.subtype.len); - memcpy (tmp, val.subtype.begin, val.subtype.len); - rspamd_str_lc (tmp, val.subtype.len); + tmp = rspamd_mempool_alloc(pool, val.subtype.len); + memcpy(tmp, val.subtype.begin, val.subtype.len); + rspamd_str_lc(tmp, val.subtype.len); res->subtype.begin = tmp; } } @@ -664,101 +688,101 @@ rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool) } struct rspamd_content_type * -rspamd_content_type_parse (const gchar *in, - gsize len, rspamd_mempool_t *pool) +rspamd_content_type_parse(const gchar *in, + gsize len, rspamd_mempool_t *pool) { struct rspamd_content_type *res = NULL; rspamd_ftok_t srch; gchar *cpy; - cpy = rspamd_mempool_alloc (pool, len + 1); - rspamd_strlcpy (cpy, in, len + 1); + cpy = rspamd_mempool_alloc(pool, len + 1); + rspamd_strlcpy(cpy, in, len + 1); - if ((res = rspamd_content_type_parser (cpy, len, pool)) != NULL) { + if ((res = rspamd_content_type_parser(cpy, len, pool)) != NULL) { if (res->attrs) { - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs); + rspamd_mempool_add_destructor(pool, + (rspamd_mempool_destruct_t) g_hash_table_unref, res->attrs); - rspamd_postprocess_ct_attributes (pool, res->attrs, - rspamd_content_type_postprocess, res); + rspamd_postprocess_ct_attributes(pool, res->attrs, + rspamd_content_type_postprocess, res); } /* Now do some hacks to work with broken content types */ if (res->subtype.len == 0) { res->flags |= RSPAMD_CONTENT_TYPE_BROKEN; - RSPAMD_FTOK_ASSIGN (&srch, "text"); + RSPAMD_FTOK_ASSIGN(&srch, "text"); - if (rspamd_ftok_casecmp (&res->type, &srch) == 0) { + if (rspamd_ftok_casecmp(&res->type, &srch) == 0) { /* Workaround for Content-Type: text */ /* Assume text/plain */ - RSPAMD_FTOK_ASSIGN (&srch, "plain"); + RSPAMD_FTOK_ASSIGN(&srch, "plain"); } else { - RSPAMD_FTOK_ASSIGN (&srch, "html"); + RSPAMD_FTOK_ASSIGN(&srch, "html"); - if (rspamd_ftok_casecmp (&res->type, &srch) == 0) { + if (rspamd_ftok_casecmp(&res->type, &srch) == 0) { /* Workaround for Content-Type: html */ - RSPAMD_FTOK_ASSIGN (&res->type, "text"); - RSPAMD_FTOK_ASSIGN (&res->subtype, "html"); + RSPAMD_FTOK_ASSIGN(&res->type, "text"); + RSPAMD_FTOK_ASSIGN(&res->subtype, "html"); } else { - RSPAMD_FTOK_ASSIGN (&srch, "application"); + RSPAMD_FTOK_ASSIGN(&srch, "application"); - if (rspamd_ftok_casecmp (&res->type, &srch) == 0) { - RSPAMD_FTOK_ASSIGN (&res->subtype, "octet-stream"); + if (rspamd_ftok_casecmp(&res->type, &srch) == 0) { + RSPAMD_FTOK_ASSIGN(&res->subtype, "octet-stream"); } } } } else { /* Common mistake done by retards */ - RSPAMD_FTOK_ASSIGN (&srch, "alternate"); + RSPAMD_FTOK_ASSIGN(&srch, "alternate"); - if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) { + if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) { res->flags |= RSPAMD_CONTENT_TYPE_BROKEN; - RSPAMD_FTOK_ASSIGN (&res->subtype, "alternative"); + RSPAMD_FTOK_ASSIGN(&res->subtype, "alternative"); } /* PKCS7 smime */ - RSPAMD_FTOK_ASSIGN (&srch, "pkcs7-mime"); - if (rspamd_substring_search (res->subtype.begin, res->subtype.len, - srch.begin, srch.len) != -1) { + RSPAMD_FTOK_ASSIGN(&srch, "pkcs7-mime"); + if (rspamd_substring_search(res->subtype.begin, res->subtype.len, + srch.begin, srch.len) != -1) { res->flags |= RSPAMD_CONTENT_TYPE_SMIME; } } - RSPAMD_FTOK_ASSIGN (&srch, "multipart"); + RSPAMD_FTOK_ASSIGN(&srch, "multipart"); - if (rspamd_ftok_casecmp (&res->type, &srch) == 0) { + if (rspamd_ftok_casecmp(&res->type, &srch) == 0) { res->flags |= RSPAMD_CONTENT_TYPE_MULTIPART; - RSPAMD_FTOK_ASSIGN (&srch, "encrypted"); - if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) { + RSPAMD_FTOK_ASSIGN(&srch, "encrypted"); + if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) { res->flags |= RSPAMD_CONTENT_TYPE_ENCRYPTED; } } else { - RSPAMD_FTOK_ASSIGN (&srch, "text"); + RSPAMD_FTOK_ASSIGN(&srch, "text"); - if (rspamd_ftok_casecmp (&res->type, &srch) == 0) { + if (rspamd_ftok_casecmp(&res->type, &srch) == 0) { res->flags |= RSPAMD_CONTENT_TYPE_TEXT; } else { - RSPAMD_FTOK_ASSIGN (&srch, "message"); + RSPAMD_FTOK_ASSIGN(&srch, "message"); - if (rspamd_ftok_casecmp (&res->type, &srch) == 0) { - RSPAMD_FTOK_ASSIGN (&srch, "delivery-status"); + if (rspamd_ftok_casecmp(&res->type, &srch) == 0) { + RSPAMD_FTOK_ASSIGN(&srch, "delivery-status"); - if (rspamd_ftok_casecmp (&res->subtype, &srch) == 0) { - res->flags |= RSPAMD_CONTENT_TYPE_TEXT|RSPAMD_CONTENT_TYPE_DSN; + if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) { + res->flags |= RSPAMD_CONTENT_TYPE_TEXT | RSPAMD_CONTENT_TYPE_DSN; } else { - RSPAMD_FTOK_ASSIGN (&srch, "notification"); + RSPAMD_FTOK_ASSIGN(&srch, "notification"); - if (rspamd_substring_search_caseless (res->subtype.begin, - res->subtype.len, srch.begin, srch.len) != -1) { - res->flags |= RSPAMD_CONTENT_TYPE_TEXT| - RSPAMD_CONTENT_TYPE_DSN; + if (rspamd_substring_search_caseless(res->subtype.begin, + res->subtype.len, srch.begin, srch.len) != -1) { + res->flags |= RSPAMD_CONTENT_TYPE_TEXT | + RSPAMD_CONTENT_TYPE_DSN; } else { res->flags |= RSPAMD_CONTENT_TYPE_MESSAGE; @@ -769,37 +793,36 @@ rspamd_content_type_parse (const gchar *in, } } else { - msg_warn_pool ("cannot parse content type: %*s", (gint)len, cpy); + msg_warn_pool("cannot parse content type: %*s", (gint) len, cpy); } return res; } -void -rspamd_content_disposition_add_param (rspamd_mempool_t *pool, - struct rspamd_content_disposition *cd, - const gchar *name_start, const gchar *name_end, - const gchar *value_start, const gchar *value_end) +void rspamd_content_disposition_add_param(rspamd_mempool_t *pool, + struct rspamd_content_disposition *cd, + const gchar *name_start, const gchar *name_end, + const gchar *value_start, const gchar *value_end) { rspamd_ftok_t srch; gchar *name_cpy, *value_cpy, *name_cpy_end, *value_cpy_end; struct rspamd_content_type_param *found = NULL, *nparam; - g_assert (cd != NULL); + g_assert(cd != NULL); - name_cpy = rspamd_mempool_alloc (pool, name_end - name_start); - memcpy (name_cpy, name_start, name_end - name_start); + name_cpy = rspamd_mempool_alloc(pool, name_end - name_start); + memcpy(name_cpy, name_start, name_end - name_start); name_cpy_end = name_cpy + (name_end - name_start); - value_cpy = rspamd_mempool_alloc (pool, value_end - value_start); - memcpy (value_cpy, value_start, value_end - value_start); + value_cpy = rspamd_mempool_alloc(pool, value_end - value_start); + memcpy(value_cpy, value_start, value_end - value_start); value_cpy_end = value_cpy + (value_end - value_start); - nparam = rspamd_mempool_alloc0 (pool, sizeof (*nparam)); - rspamd_str_lc (name_cpy, name_cpy_end - name_cpy); + nparam = rspamd_mempool_alloc0(pool, sizeof(*nparam)); + rspamd_str_lc(name_cpy, name_cpy_end - name_cpy); - if (!rspamd_param_maybe_rfc2231_process (pool, nparam, name_cpy, - name_cpy_end, value_cpy, value_cpy_end)) { + if (!rspamd_param_maybe_rfc2231_process(pool, nparam, name_cpy, + name_cpy_end, value_cpy, value_cpy_end)) { nparam->name.begin = name_cpy; nparam->name.len = name_cpy_end - name_cpy; nparam->value.begin = value_cpy; @@ -810,50 +833,51 @@ rspamd_content_disposition_add_param (rspamd_mempool_t *pool, srch.len = nparam->name.len; if (cd->attrs) { - found = g_hash_table_lookup (cd->attrs, &srch); - } else { - cd->attrs = g_hash_table_new (rspamd_ftok_icase_hash, - rspamd_ftok_icase_equal); + found = g_hash_table_lookup(cd->attrs, &srch); + } + else { + cd->attrs = g_hash_table_new(rspamd_ftok_icase_hash, + rspamd_ftok_icase_equal); } if (!found) { - DL_APPEND (found, nparam); - g_hash_table_insert (cd->attrs, &nparam->name, nparam); + DL_APPEND(found, nparam); + g_hash_table_insert(cd->attrs, &nparam->name, nparam); } else { - DL_APPEND (found, nparam); + DL_APPEND(found, nparam); } } struct rspamd_content_disposition * -rspamd_content_disposition_parse (const gchar *in, - gsize len, rspamd_mempool_t *pool) +rspamd_content_disposition_parse(const gchar *in, + gsize len, rspamd_mempool_t *pool) { struct rspamd_content_disposition *res = NULL, val; - if (rspamd_content_disposition_parser (in, len, &val, pool)) { + if (rspamd_content_disposition_parser(in, len, &val, pool)) { if (val.type == RSPAMD_CT_UNKNOWN) { /* 'Fix' type to attachment as MUA does */ val.type = RSPAMD_CT_ATTACHMENT; } - res = rspamd_mempool_alloc (pool, sizeof (val)); - memcpy (res, &val, sizeof (val)); - res->lc_data = rspamd_mempool_alloc (pool, len + 1); - rspamd_strlcpy (res->lc_data, in, len + 1); - rspamd_str_lc (res->lc_data, len); + res = rspamd_mempool_alloc(pool, sizeof(val)); + memcpy(res, &val, sizeof(val)); + res->lc_data = rspamd_mempool_alloc(pool, len + 1); + rspamd_strlcpy(res->lc_data, in, len + 1); + rspamd_str_lc(res->lc_data, len); if (res->attrs) { - rspamd_postprocess_ct_attributes (pool, res->attrs, - rspamd_content_disposition_postprocess, res); - rspamd_mempool_add_destructor (pool, - (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs); + rspamd_postprocess_ct_attributes(pool, res->attrs, + rspamd_content_disposition_postprocess, res); + rspamd_mempool_add_destructor(pool, + (rspamd_mempool_destruct_t) g_hash_table_unref, res->attrs); } } else { - msg_warn_pool ("cannot parse content disposition: %*s", - (gint)len, in); + msg_warn_pool("cannot parse content disposition: %*s", + (gint) len, in); } return res; diff --git a/src/libmime/content_type.h b/src/libmime/content_type.h index 6bc7262e7..ac49bdc35 100644 --- a/src/libmime/content_type.h +++ b/src/libmime/content_type.h @@ -20,7 +20,7 @@ #include "libutil/fstring.h" #include "libutil/mem_pool.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -83,11 +83,10 @@ struct rspamd_content_disposition { * @param value_start (can be modified) * @param value_end */ -void -rspamd_content_type_add_param (rspamd_mempool_t *pool, - struct rspamd_content_type *ct, - gchar *name_start, gchar *name_end, - gchar *value_start, gchar *value_end); +void rspamd_content_type_add_param(rspamd_mempool_t *pool, + struct rspamd_content_type *ct, + gchar *name_start, gchar *name_end, + gchar *value_start, gchar *value_end); /** * Parse content type from the header (performs copy + lowercase) @@ -96,8 +95,8 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool, * @param pool * @return */ -struct rspamd_content_type *rspamd_content_type_parse (const gchar *in, - gsize len, rspamd_mempool_t *pool); +struct rspamd_content_type *rspamd_content_type_parse(const gchar *in, + gsize len, rspamd_mempool_t *pool); /** * Adds new param for content disposition header @@ -108,11 +107,10 @@ struct rspamd_content_type *rspamd_content_type_parse (const gchar *in, * @param value_start * @param value_end */ -void -rspamd_content_disposition_add_param (rspamd_mempool_t *pool, - struct rspamd_content_disposition *cd, - const gchar *name_start, const gchar *name_end, - const gchar *value_start, const gchar *value_end); +void rspamd_content_disposition_add_param(rspamd_mempool_t *pool, + struct rspamd_content_disposition *cd, + const gchar *name_start, const gchar *name_end, + const gchar *value_start, const gchar *value_end); /** * Parse content-disposition header @@ -121,11 +119,11 @@ rspamd_content_disposition_add_param (rspamd_mempool_t *pool, * @param pool * @return */ -struct rspamd_content_disposition *rspamd_content_disposition_parse (const gchar *in, - gsize len, - rspamd_mempool_t *pool); +struct rspamd_content_disposition *rspamd_content_disposition_parse(const gchar *in, + gsize len, + rspamd_mempool_t *pool); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/email_addr.c b/src/libmime/email_addr.c index 38a4732c5..0af7388a8 100644 --- a/src/libmime/email_addr.c +++ b/src/libmime/email_addr.c @@ -21,7 +21,7 @@ #include "smtp_parsers.h" static void -rspamd_email_address_unescape (struct rspamd_email_address *addr) +rspamd_email_address_unescape(struct rspamd_email_address *addr) { const char *h, *end; char *t, *d; @@ -30,7 +30,7 @@ rspamd_email_address_unescape (struct rspamd_email_address *addr) return; } - d = g_malloc (addr->user_len); + d = g_malloc(addr->user_len); t = d; h = addr->user; end = h + addr->user_len; @@ -39,7 +39,7 @@ rspamd_email_address_unescape (struct rspamd_email_address *addr) if (*h != '\\') { *t++ = *h; } - h ++; + h++; } addr->user = d; @@ -48,7 +48,7 @@ rspamd_email_address_unescape (struct rspamd_email_address *addr) } struct rspamd_email_address * -rspamd_email_address_from_smtp (const gchar *str, guint len) +rspamd_email_address_from_smtp(const gchar *str, guint len) { struct rspamd_email_address addr, *ret; gsize nlen; @@ -57,24 +57,24 @@ rspamd_email_address_from_smtp (const gchar *str, guint len) return NULL; } - rspamd_smtp_addr_parse (str, len, &addr); + rspamd_smtp_addr_parse(str, len, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { - ret = g_malloc (sizeof (*ret)); - memcpy (ret, &addr, sizeof (addr)); + ret = g_malloc(sizeof(*ret)); + memcpy(ret, &addr, sizeof(addr)); if ((ret->flags & RSPAMD_EMAIL_ADDR_QUOTED) && ret->addr[0] == '"') { if (ret->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) { /* We also need to unquote user */ - rspamd_email_address_unescape (ret); + rspamd_email_address_unescape(ret); } /* We need to unquote addr */ nlen = ret->domain_len + ret->user_len + 2; - ret->addr = g_malloc (nlen + 1); - ret->addr_len = rspamd_snprintf ((char *)ret->addr, nlen, "%*s@%*s", - (gint)ret->user_len, ret->user, - (gint)ret->domain_len, ret->domain); + ret->addr = g_malloc(nlen + 1); + ret->addr_len = rspamd_snprintf((char *) ret->addr, nlen, "%*s@%*s", + (gint) ret->user_len, ret->user, + (gint) ret->domain_len, ret->domain); ret->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED; } @@ -84,36 +84,35 @@ rspamd_email_address_from_smtp (const gchar *str, guint len) return NULL; } -void -rspamd_email_address_free (struct rspamd_email_address *addr) +void rspamd_email_address_free(struct rspamd_email_address *addr) { if (addr) { if (addr->flags & RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED) { - g_free ((void *) addr->addr); + g_free((void *) addr->addr); } if (addr->flags & RSPAMD_EMAIL_ADDR_USER_ALLOCATED) { - g_free ((void *) addr->user); + g_free((void *) addr->user); } - g_free (addr); + g_free(addr); } } static inline void -rspamd_email_address_add (rspamd_mempool_t *pool, - GPtrArray *ar, - struct rspamd_email_address *addr, - GString *name) +rspamd_email_address_add(rspamd_mempool_t *pool, + GPtrArray *ar, + struct rspamd_email_address *addr, + GString *name) { struct rspamd_email_address *elt; guint nlen; - elt = g_malloc0 (sizeof (*elt)); - rspamd_mempool_notify_alloc (pool, sizeof (*elt)); + elt = g_malloc0(sizeof(*elt)); + rspamd_mempool_notify_alloc(pool, sizeof(*elt)); if (addr != NULL) { - memcpy (elt, addr, sizeof (*addr)); + memcpy(elt, addr, sizeof(*addr)); } else { elt->addr = ""; @@ -127,43 +126,43 @@ rspamd_email_address_add (rspamd_mempool_t *pool, if ((elt->flags & RSPAMD_EMAIL_ADDR_QUOTED) && elt->addr[0] == '"') { if (elt->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) { /* We also need to unquote user */ - rspamd_email_address_unescape (elt); + rspamd_email_address_unescape(elt); } /* We need to unquote addr */ nlen = elt->domain_len + elt->user_len + 2; - elt->addr = g_malloc (nlen + 1); - rspamd_mempool_notify_alloc (pool, nlen + 1); - elt->addr_len = rspamd_snprintf ((char *)elt->addr, nlen, "%*s@%*s", - (gint)elt->user_len, elt->user, - (gint)elt->domain_len, elt->domain); + elt->addr = g_malloc(nlen + 1); + rspamd_mempool_notify_alloc(pool, nlen + 1); + elt->addr_len = rspamd_snprintf((char *) elt->addr, nlen, "%*s@%*s", + (gint) elt->user_len, elt->user, + (gint) elt->domain_len, elt->domain); elt->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED; } if (name->len > 0) { - rspamd_gstring_strip (name, " \t\v"); - elt->name = rspamd_mime_header_decode (pool, name->str, name->len, NULL); + rspamd_gstring_strip(name, " \t\v"); + elt->name = rspamd_mime_header_decode(pool, name->str, name->len, NULL); } - rspamd_mempool_notify_alloc (pool, name->len); - g_ptr_array_add (ar, elt); + rspamd_mempool_notify_alloc(pool, name->len); + g_ptr_array_add(ar, elt); } /* * Tries to parse an email address that doesn't conform RFC */ static gboolean -rspamd_email_address_parse_heuristic (const char *data, size_t len, - struct rspamd_email_address *addr) +rspamd_email_address_parse_heuristic(const char *data, size_t len, + struct rspamd_email_address *addr) { const gchar *p = data, *at = NULL, *end = data + len; gboolean ret = FALSE; - memset (addr, 0, sizeof (*addr)); + memset(addr, 0, sizeof(*addr)); if (*p == '<' && len > 1) { /* Angled address */ - addr->addr_len = rspamd_memcspn (p + 1, ">", len - 1); + addr->addr_len = rspamd_memcspn(p + 1, ">", len - 1); addr->addr = p + 1; addr->raw = p; addr->raw_len = len; @@ -182,7 +181,7 @@ rspamd_email_address_parse_heuristic (const char *data, size_t len, } if (ret) { - at = rspamd_memrchr (p, '@', len); + at = rspamd_memrchr(p, '@', len); if (at != NULL && at + 1 < end) { addr->domain = at + 1; @@ -191,7 +190,7 @@ rspamd_email_address_parse_heuristic (const char *data, size_t len, addr->user_len = at - p; } - if (rspamd_str_has_8bit (p, len)) { + if (rspamd_str_has_8bit(p, len)) { addr->flags |= RSPAMD_EMAIL_ADDR_HAS_8BIT; } } @@ -200,36 +199,36 @@ rspamd_email_address_parse_heuristic (const char *data, size_t len, } static inline int -rspamd_email_address_check_and_add (const gchar *start, gsize len, - GPtrArray *res, - rspamd_mempool_t *pool, - GString *ns, - gint max_elements) +rspamd_email_address_check_and_add(const gchar *start, gsize len, + GPtrArray *res, + rspamd_mempool_t *pool, + GString *ns, + gint max_elements) { struct rspamd_email_address addr; - g_assert (res != NULL); + g_assert(res != NULL); if (max_elements > 0 && res->len >= max_elements) { - msg_info_pool_check ("reached maximum number of elements %d when adding %v", - max_elements, - ns); + msg_info_pool_check("reached maximum number of elements %d when adding %v", + max_elements, + ns); return -1; } /* The whole email is likely address */ - memset (&addr, 0, sizeof (addr)); - rspamd_smtp_addr_parse (start, len, &addr); + memset(&addr, 0, sizeof(addr)); + rspamd_smtp_addr_parse(start, len, &addr); if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) { - rspamd_email_address_add (pool, res, &addr, ns); + rspamd_email_address_add(pool, res, &addr, ns); } else { /* Try heuristic */ - if (rspamd_email_address_parse_heuristic (start, - len, &addr)) { - rspamd_email_address_add (pool, res, &addr, ns); + if (rspamd_email_address_parse_heuristic(start, + len, &addr)) { + rspamd_email_address_add(pool, res, &addr, ns); return 1; } @@ -242,10 +241,10 @@ rspamd_email_address_check_and_add (const gchar *start, gsize len, } GPtrArray * -rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, - guint len, - GPtrArray *src, - gint max_elements) +rspamd_email_address_from_mime(rspamd_mempool_t *pool, const gchar *hdr, + guint len, + GPtrArray *src, + gint max_elements) { GPtrArray *res = src; gboolean seen_at = FALSE, seen_obrace = FALSE; @@ -258,23 +257,24 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, parse_quoted, parse_addr, skip_spaces - } state = parse_name, next_state = parse_name; + } state = parse_name, + next_state = parse_name; if (res == NULL) { - res = g_ptr_array_sized_new (2); - rspamd_mempool_add_destructor (pool, rspamd_email_address_list_destroy, - res); + res = g_ptr_array_sized_new(2); + rspamd_mempool_add_destructor(pool, rspamd_email_address_list_destroy, + res); } else if (max_elements > 0 && res->len >= max_elements) { - msg_info_pool_check ("reached maximum number of elements %d", max_elements); + msg_info_pool_check("reached maximum number of elements %d", max_elements); return res; } - ns = g_string_sized_new (len); - cpy = g_string_sized_new (len); + ns = g_string_sized_new(len); + cpy = g_string_sized_new(len); - rspamd_mempool_add_destructor (pool, rspamd_gstring_free_hard, cpy); + rspamd_mempool_add_destructor(pool, rspamd_gstring_free_hard, cpy); /* First, we need to remove all comments as they are terrible */ obraces = 0; @@ -284,7 +284,7 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, if (state == parse_name) { if (*p == '\\') { if (obraces == 0) { - g_string_append_c (cpy, *p); + g_string_append_c(cpy, *p); } p++; @@ -294,11 +294,11 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, state = parse_quoted; } else if (*p == '(') { - obraces ++; /* To avoid ) itself being copied */ + obraces++; /* To avoid ) itself being copied */ } else if (*p == ')') { - ebraces ++; - p ++; + ebraces++; + p++; } if (obraces == ebraces) { @@ -308,13 +308,13 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, } if (p < end && obraces == 0) { - g_string_append_c (cpy, *p); + g_string_append_c(cpy, *p); } } else { /* Quoted elt */ if (*p == '\\') { - g_string_append_c (cpy, *p); + g_string_append_c(cpy, *p); p++; } else { @@ -324,7 +324,7 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, } if (p < end) { - g_string_append_c (cpy, *p); + g_string_append_c(cpy, *p); } } @@ -347,15 +347,15 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, t = p - 1; - while (t > c && g_ascii_isspace (*t)) { - t --; - nspaces ++; + while (t > c && g_ascii_isspace(*t)) { + t--; + nspaces++; } - g_string_append_len (ns, c, t - c + 1); + g_string_append_len(ns, c, t - c + 1); if (nspaces > 0) { - g_string_append_c (ns, ' '); + g_string_append_c(ns, ' '); } } @@ -366,11 +366,11 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, if (p > c) { t = p - 1; - while (t > c && g_ascii_isspace (*t)) { - t --; + while (t > c && g_ascii_isspace(*t)) { + t--; } - g_string_append_len (ns, c, t - c + 1); + g_string_append_len(ns, c, t - c + 1); } c = p; @@ -384,23 +384,23 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, */ t = p - 1; - while (t > c && g_ascii_isspace (*t)) { - t --; + while (t > c && g_ascii_isspace(*t)) { + t--; } - int check = rspamd_email_address_check_and_add (c, t - c + 1, - res, pool, ns, max_elements); + int check = rspamd_email_address_check_and_add(c, t - c + 1, + res, pool, ns, max_elements); if (check == 0 && res->len == 0) { /* Insert fake address */ - rspamd_email_address_add (pool, res, NULL, ns); + rspamd_email_address_add(pool, res, NULL, ns); } else if (check != 1) { goto end; } /* Cleanup for the next use */ - g_string_set_size (ns, 0); + g_string_set_size(ns, 0); seen_at = FALSE; } @@ -411,24 +411,24 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, seen_at = TRUE; } - p ++; + p++; break; case parse_quoted: if (*p == '\\') { if (p > c) { - g_string_append_len (ns, c, p - c); + g_string_append_len(ns, c, p - c); } - p ++; + p++; c = p; } else if (*p == '"') { if (p > c) { - g_string_append_len (ns, c, p - c); + g_string_append_len(ns, c, p - c); } - if (p + 1 < end && g_ascii_isspace (p[1])) { - g_string_append_c (ns, ' '); + if (p + 1 < end && g_ascii_isspace(p[1])) { + g_string_append_c(ns, ' '); } state = skip_spaces; @@ -440,22 +440,22 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, else if (*p == '<') { seen_obrace = TRUE; } - p ++; + p++; break; case parse_addr: if (*p == '>') { - int check = rspamd_email_address_check_and_add (c, p - c + 1, - res, pool, ns, max_elements); + int check = rspamd_email_address_check_and_add(c, p - c + 1, + res, pool, ns, max_elements); if (check == 0 && res->len == 0) { /* Insert a fake address */ - rspamd_email_address_add (pool, res, NULL, ns); + rspamd_email_address_add(pool, res, NULL, ns); } else if (check != 1) { goto end; } /* Cleanup for the next use */ - g_string_set_size (ns, 0); + g_string_set_size(ns, 0); seen_at = FALSE; state = skip_spaces; next_state = parse_name; @@ -463,15 +463,15 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, else if (*p == '@') { seen_at = TRUE; } - p ++; + p++; break; case skip_spaces: - if (!g_ascii_isspace (*p)) { + if (!g_ascii_isspace(*p)) { c = p; state = next_state; } else { - p ++; + p++; } break; } @@ -482,42 +482,43 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, case parse_name: /* Assume the whole header as name (bad thing) */ if (p > c) { - while (p > c && g_ascii_isspace (*p)) { - p --; + while (p > c && g_ascii_isspace(*p)) { + p--; } if (p > c) { if (seen_at) { /* The whole email is likely address */ - int check = rspamd_email_address_check_and_add (c, p - c, - res, pool, ns, max_elements); + int check = rspamd_email_address_check_and_add(c, p - c, + res, pool, ns, max_elements); if (check == 0 && res->len == 0) { /* Insert a fake address */ - rspamd_email_address_add (pool, res, NULL, ns); + rspamd_email_address_add(pool, res, NULL, ns); } else if (check != 1) { goto end; } - } else { + } + else { /* No @ seen */ - g_string_append_len (ns, c, p - c); + g_string_append_len(ns, c, p - c); if (res->len == 0) { - rspamd_email_address_add (pool, res, NULL, ns); + rspamd_email_address_add(pool, res, NULL, ns); } } } else if (res->len == 0) { - rspamd_email_address_add (pool, res, NULL, ns); + rspamd_email_address_add(pool, res, NULL, ns); } } break; case parse_addr: if (p > c) { - if (rspamd_email_address_check_and_add (c, p - c, - res, pool, ns, max_elements) == 0) { + if (rspamd_email_address_check_and_add(c, p - c, + res, pool, ns, max_elements) == 0) { if (res->len == 0) { - rspamd_email_address_add (pool, res, NULL, ns); + rspamd_email_address_add(pool, res, NULL, ns); } } } @@ -526,12 +527,12 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, /* Unfinished quoted string or a comment */ /* If we have seen obrace + at, then we still can try to resolve address */ if (seen_at && seen_obrace) { - p = rspamd_memrchr (cpy->str, '<', cpy->len); - g_assert (p != NULL); - if (rspamd_email_address_check_and_add (p, end - p, - res, pool, ns, max_elements) == 0) { + p = rspamd_memrchr(cpy->str, '<', cpy->len); + g_assert(p != NULL); + if (rspamd_email_address_check_and_add(p, end - p, + res, pool, ns, max_elements) == 0) { if (res->len == 0) { - rspamd_email_address_add (pool, res, NULL, ns); + rspamd_email_address_add(pool, res, NULL, ns); } } } @@ -541,22 +542,22 @@ rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, break; } end: - rspamd_mempool_notify_alloc (pool, cpy->len); - g_string_free (ns, TRUE); + rspamd_mempool_notify_alloc(pool, cpy->len); + g_string_free(ns, TRUE); return res; } -void -rspamd_email_address_list_destroy (gpointer ptr) +void rspamd_email_address_list_destroy(gpointer ptr) { GPtrArray *ar = ptr; guint i; struct rspamd_email_address *addr; - PTR_ARRAY_FOREACH (ar, i, addr) { - rspamd_email_address_free (addr); + PTR_ARRAY_FOREACH(ar, i, addr) + { + rspamd_email_address_free(addr); } - g_ptr_array_free (ar, TRUE); + g_ptr_array_free(ar, TRUE); }
\ No newline at end of file diff --git a/src/libmime/email_addr.h b/src/libmime/email_addr.h index 7e150f80d..ed00722f9 100644 --- a/src/libmime/email_addr.h +++ b/src/libmime/email_addr.h @@ -21,7 +21,7 @@ #include "libutil/ref.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -66,7 +66,7 @@ struct rspamd_task; * @param len length of string * @return */ -struct rspamd_email_address *rspamd_email_address_from_smtp (const gchar *str, guint len); +struct rspamd_email_address *rspamd_email_address_from_smtp(const gchar *str, guint len); /** * Parses email address from the mime header, decodes names and return the array @@ -78,19 +78,19 @@ struct rspamd_email_address *rspamd_email_address_from_smtp (const gchar *str, g * @return */ GPtrArray * -rspamd_email_address_from_mime (rspamd_mempool_t *pool, const gchar *hdr, guint len, - GPtrArray *src, gint max_elements); +rspamd_email_address_from_mime(rspamd_mempool_t *pool, const gchar *hdr, guint len, + GPtrArray *src, gint max_elements); /** * Destroys list of email addresses * @param ptr */ -void rspamd_email_address_list_destroy (gpointer ptr); +void rspamd_email_address_list_destroy(gpointer ptr); -void rspamd_email_address_free (struct rspamd_email_address *addr); +void rspamd_email_address_free(struct rspamd_email_address *addr); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/images.c b/src/libmime/images.c index dc34c8174..1344d913f 100644 --- a/src/libmime/images.c +++ b/src/libmime/images.c @@ -19,10 +19,10 @@ #include "message.h" #include "libserver/html/html.h" -#define msg_debug_images(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_images_log_id, "images", task->task_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) +#define msg_debug_images(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_images_log_id, "images", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) INIT_LOG_MODULE(images) @@ -43,60 +43,58 @@ static const guint8 jpg_sig_exif[] = {0xff, 0xe1}; static const guint8 gif_signature[] = {'G', 'I', 'F', '8'}; static const guint8 bmp_signature[] = {'B', 'M'}; -static bool process_image (struct rspamd_task *task, struct rspamd_mime_part *part); +static bool process_image(struct rspamd_task *task, struct rspamd_mime_part *part); -bool -rspamd_images_process_mime_part_maybe (struct rspamd_task *task, - struct rspamd_mime_part *part) +bool rspamd_images_process_mime_part_maybe(struct rspamd_task *task, + struct rspamd_mime_part *part) { if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) { if (part->detected_type && - strcmp (part->detected_type, "image") == 0 && + strcmp(part->detected_type, "image") == 0 && part->parsed_data.len > 0) { - return process_image (task, part); + return process_image(task, part); } } return false; } -void -rspamd_images_process (struct rspamd_task *task) +void rspamd_images_process(struct rspamd_task *task) { guint i; struct rspamd_mime_part *part; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { - rspamd_images_process_mime_part_maybe (task, part); + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { + rspamd_images_process_mime_part_maybe(task, part); } - } static enum rspamd_image_type -detect_image_type (rspamd_ftok_t *data) +detect_image_type(rspamd_ftok_t *data) { - if (data->len > sizeof (png_signature) / sizeof (png_signature[0])) { - if (memcmp (data->begin, png_signature, sizeof (png_signature)) == 0) { + if (data->len > sizeof(png_signature) / sizeof(png_signature[0])) { + if (memcmp(data->begin, png_signature, sizeof(png_signature)) == 0) { return IMAGE_TYPE_PNG; } } if (data->len > 10) { - if (memcmp (data->begin, jpg_sig1, sizeof (jpg_sig1)) == 0) { - if (memcmp (data->begin + 2, jpg_sig_jfif, sizeof (jpg_sig_jfif)) == 0 || - memcmp (data->begin + 2, jpg_sig_exif, sizeof (jpg_sig_exif)) == 0) { + if (memcmp(data->begin, jpg_sig1, sizeof(jpg_sig1)) == 0) { + if (memcmp(data->begin + 2, jpg_sig_jfif, sizeof(jpg_sig_jfif)) == 0 || + memcmp(data->begin + 2, jpg_sig_exif, sizeof(jpg_sig_exif)) == 0) { return IMAGE_TYPE_JPG; } } } - if (data->len > sizeof (gif_signature) / sizeof (gif_signature[0])) { - if (memcmp (data->begin, gif_signature, sizeof (gif_signature)) == 0) { + if (data->len > sizeof(gif_signature) / sizeof(gif_signature[0])) { + if (memcmp(data->begin, gif_signature, sizeof(gif_signature)) == 0) { return IMAGE_TYPE_GIF; } } - if (data->len > sizeof (bmp_signature) / sizeof (bmp_signature[0])) { - if (memcmp (data->begin, bmp_signature, sizeof (bmp_signature)) == 0) { + if (data->len > sizeof(bmp_signature) / sizeof(bmp_signature[0])) { + if (memcmp(data->begin, bmp_signature, sizeof(bmp_signature)) == 0) { return IMAGE_TYPE_BMP; } } @@ -106,47 +104,47 @@ detect_image_type (rspamd_ftok_t *data) static struct rspamd_image * -process_png_image (rspamd_mempool_t *pool, rspamd_ftok_t *data) +process_png_image(rspamd_mempool_t *pool, rspamd_ftok_t *data) { struct rspamd_image *img; guint32 t; const guint8 *p; if (data->len < 24) { - msg_info_pool ("bad png detected (maybe striped)"); + msg_info_pool("bad png detected (maybe striped)"); return NULL; } /* In png we should find iHDR section and get data from it */ /* Skip signature and read header section */ p = data->begin + 12; - if (memcmp (p, "IHDR", 4) != 0) { - msg_info_pool ("png doesn't begins with IHDR section"); + if (memcmp(p, "IHDR", 4) != 0) { + msg_info_pool("png doesn't begins with IHDR section"); return NULL; } - img = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_image)); + img = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_image)); img->type = IMAGE_TYPE_PNG; img->data = data; p += 4; - memcpy (&t, p, sizeof (guint32)); - img->width = ntohl (t); + memcpy(&t, p, sizeof(guint32)); + img->width = ntohl(t); p += 4; - memcpy (&t, p, sizeof (guint32)); - img->height = ntohl (t); + memcpy(&t, p, sizeof(guint32)); + img->height = ntohl(t); return img; } static struct rspamd_image * -process_jpg_image (rspamd_mempool_t *pool, rspamd_ftok_t *data) +process_jpg_image(rspamd_mempool_t *pool, rspamd_ftok_t *data) { const guint8 *p, *end; guint16 h, w; struct rspamd_image *img; - img = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_image)); + img = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_image)); img->type = IMAGE_TYPE_JPG; img->data = data; @@ -158,11 +156,11 @@ process_jpg_image (rspamd_mempool_t *pool, rspamd_ftok_t *data) if (p[0] == 0xFF && p[1] != 0xFF) { guint len = p[2] * 256 + p[3]; - p ++; + p++; if (*p == 0xc0 || *p == 0xc1 || *p == 0xc2 || *p == 0xc3 || - *p == 0xc9 || *p == 0xca || *p == 0xcb) { - memcpy (&h, p + 4, sizeof (guint16)); + *p == 0xc9 || *p == 0xca || *p == 0xcb) { + memcpy(&h, p + 4, sizeof(guint16)); h = p[4] * 0xff + p[5]; img->height = h; w = p[6] * 0xff + p[7]; @@ -183,50 +181,50 @@ process_jpg_image (rspamd_mempool_t *pool, rspamd_ftok_t *data) } static struct rspamd_image * -process_gif_image (rspamd_mempool_t *pool, rspamd_ftok_t *data) +process_gif_image(rspamd_mempool_t *pool, rspamd_ftok_t *data) { struct rspamd_image *img; const guint8 *p; guint16 t; if (data->len < 10) { - msg_info_pool ("bad gif detected (maybe striped)"); + msg_info_pool("bad gif detected (maybe striped)"); return NULL; } - img = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_image)); + img = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_image)); img->type = IMAGE_TYPE_GIF; img->data = data; p = data->begin + 6; - memcpy (&t, p, sizeof (guint16)); - img->width = GUINT16_FROM_LE (t); - memcpy (&t, p + 2, sizeof (guint16)); - img->height = GUINT16_FROM_LE (t); + memcpy(&t, p, sizeof(guint16)); + img->width = GUINT16_FROM_LE(t); + memcpy(&t, p + 2, sizeof(guint16)); + img->height = GUINT16_FROM_LE(t); return img; } static struct rspamd_image * -process_bmp_image (rspamd_mempool_t *pool, rspamd_ftok_t *data) +process_bmp_image(rspamd_mempool_t *pool, rspamd_ftok_t *data) { struct rspamd_image *img; gint32 t; const guint8 *p; if (data->len < 28) { - msg_info_pool ("bad bmp detected (maybe striped)"); + msg_info_pool("bad bmp detected (maybe striped)"); return NULL; } - img = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_image)); + img = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_image)); img->type = IMAGE_TYPE_BMP; img->data = data; p = data->begin + 18; - memcpy (&t, p, sizeof (guint32)); - img->width = GUINT32_FROM_LE (t); - memcpy (&t, p + 4, sizeof (gint32)); - img->height = GUINT32_FROM_LE (t); + memcpy(&t, p, sizeof(guint32)); + img->width = GUINT32_FROM_LE(t); + memcpy(&t, p + 4, sizeof(gint32)); + img->height = GUINT32_FROM_LE(t); return img; } @@ -237,18 +235,18 @@ process_bmp_image (rspamd_mempool_t *pool, rspamd_ftok_t *data) * http://unix4lyfe.org/dct/ */ static void -rspamd_image_dct_block (gint pixels[8][8], gdouble *out) +rspamd_image_dct_block(gint pixels[8][8], gdouble *out) { gint i; gint rows[8][8]; static const gint c1 = 1004 /* cos(pi/16) << 10 */, - s1 = 200 /* sin(pi/16) */, - c3 = 851 /* cos(3pi/16) << 10 */, - s3 = 569 /* sin(3pi/16) << 10 */, - r2c6 = 554 /* sqrt(2)*cos(6pi/16) << 10 */, - r2s6 = 1337 /* sqrt(2)*sin(6pi/16) << 10 */, - r2 = 181; /* sqrt(2) << 7*/ + s1 = 200 /* sin(pi/16) */, + c3 = 851 /* cos(3pi/16) << 10 */, + s3 = 569 /* sin(3pi/16) << 10 */, + r2c6 = 554 /* sqrt(2)*cos(6pi/16) << 10 */, + r2s6 = 1337 /* sqrt(2)*sin(6pi/16) << 10 */, + r2 = 181; /* sqrt(2) << 7*/ gint x0, x1, x2, x3, x4, x5, x6, x7, x8; @@ -369,52 +367,52 @@ struct rspamd_image_cache_entry { }; static void -rspamd_image_cache_entry_dtor (gpointer p) +rspamd_image_cache_entry_dtor(gpointer p) { struct rspamd_image_cache_entry *entry = p; - g_free (entry); + g_free(entry); } static guint32 -rspamd_image_dct_hash (gconstpointer p) +rspamd_image_dct_hash(gconstpointer p) { - return rspamd_cryptobox_fast_hash (p, rspamd_cryptobox_HASHBYTES, - rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash(p, rspamd_cryptobox_HASHBYTES, + rspamd_hash_seed()); } static gboolean -rspamd_image_dct_equal (gconstpointer a, gconstpointer b) +rspamd_image_dct_equal(gconstpointer a, gconstpointer b) { - return memcmp (a, b, rspamd_cryptobox_HASHBYTES) == 0; + return memcmp(a, b, rspamd_cryptobox_HASHBYTES) == 0; } static void -rspamd_image_create_cache (struct rspamd_config *cfg) +rspamd_image_create_cache(struct rspamd_config *cfg) { - images_hash = rspamd_lru_hash_new_full (cfg->images_cache_size, NULL, - rspamd_image_cache_entry_dtor, - rspamd_image_dct_hash, rspamd_image_dct_equal); + images_hash = rspamd_lru_hash_new_full(cfg->images_cache_size, NULL, + rspamd_image_cache_entry_dtor, + rspamd_image_dct_hash, rspamd_image_dct_equal); } static gboolean -rspamd_image_check_hash (struct rspamd_task *task, struct rspamd_image *img) +rspamd_image_check_hash(struct rspamd_task *task, struct rspamd_image *img) { struct rspamd_image_cache_entry *found; if (images_hash == NULL) { - rspamd_image_create_cache (task->cfg); + rspamd_image_create_cache(task->cfg); } - found = rspamd_lru_hash_lookup (images_hash, img->parent->digest, - task->tv.tv_sec); + found = rspamd_lru_hash_lookup(images_hash, img->parent->digest, + task->tv.tv_sec); if (found) { /* We need to decompress */ - img->dct = g_malloc (RSPAMD_DCT_LEN / NBBY); - rspamd_mempool_add_destructor (task->task_pool, g_free, - img->dct); + img->dct = g_malloc(RSPAMD_DCT_LEN / NBBY); + rspamd_mempool_add_destructor(task->task_pool, g_free, + img->dct); /* Copy as found could be destroyed by LRU */ - memcpy (img->dct, found->dct, RSPAMD_DCT_LEN / NBBY); + memcpy(img->dct, found->dct, RSPAMD_DCT_LEN / NBBY); img->is_normalized = TRUE; return TRUE; @@ -424,29 +422,28 @@ rspamd_image_check_hash (struct rspamd_task *task, struct rspamd_image *img) } static void -rspamd_image_save_hash (struct rspamd_task *task, struct rspamd_image *img) +rspamd_image_save_hash(struct rspamd_task *task, struct rspamd_image *img) { struct rspamd_image_cache_entry *found; if (img->is_normalized) { - found = rspamd_lru_hash_lookup (images_hash, img->parent->digest, - task->tv.tv_sec); + found = rspamd_lru_hash_lookup(images_hash, img->parent->digest, + task->tv.tv_sec); if (!found) { - found = g_malloc0 (sizeof (*found)); - memcpy (found->dct, img->dct, RSPAMD_DCT_LEN / NBBY); - memcpy (found->digest, img->parent->digest, sizeof (found->digest)); + found = g_malloc0(sizeof(*found)); + memcpy(found->dct, img->dct, RSPAMD_DCT_LEN / NBBY); + memcpy(found->digest, img->parent->digest, sizeof(found->digest)); - rspamd_lru_hash_insert (images_hash, found->digest, found, - task->tv.tv_sec, 0); + rspamd_lru_hash_insert(images_hash, found->digest, found, + task->tv.tv_sec, 0); } } } #endif -void -rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img) +void rspamd_image_normalize(struct rspamd_task *task, struct rspamd_image *img) { #ifdef USABLE_GD gdImagePtr src = NULL, dst = NULL; @@ -458,7 +455,7 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img) } if (img->height <= RSPAMD_NORMALIZED_DIM || - img->width <= RSPAMD_NORMALIZED_DIM) { + img->width <= RSPAMD_NORMALIZED_DIM) { return; } @@ -466,43 +463,43 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img) return; } - if (rspamd_image_check_hash (task, img)) { + if (rspamd_image_check_hash(task, img)) { return; } switch (img->type) { case IMAGE_TYPE_JPG: - src = gdImageCreateFromJpegPtr (img->data->len, (void *)img->data->begin); + src = gdImageCreateFromJpegPtr(img->data->len, (void *) img->data->begin); break; case IMAGE_TYPE_PNG: - src = gdImageCreateFromPngPtr (img->data->len, (void *)img->data->begin); + src = gdImageCreateFromPngPtr(img->data->len, (void *) img->data->begin); break; case IMAGE_TYPE_GIF: - src = gdImageCreateFromGifPtr (img->data->len, (void *)img->data->begin); + src = gdImageCreateFromGifPtr(img->data->len, (void *) img->data->begin); break; case IMAGE_TYPE_BMP: - src = gdImageCreateFromBmpPtr (img->data->len, (void *)img->data->begin); + src = gdImageCreateFromBmpPtr(img->data->len, (void *) img->data->begin); break; default: return; } if (src == NULL) { - msg_info_task ("cannot load image of type %s from %T", - rspamd_image_type_str (img->type), img->filename); + msg_info_task("cannot load image of type %s from %T", + rspamd_image_type_str(img->type), img->filename); } else { - gdImageSetInterpolationMethod (src, GD_BILINEAR_FIXED); + gdImageSetInterpolationMethod(src, GD_BILINEAR_FIXED); - dst = gdImageScale (src, RSPAMD_NORMALIZED_DIM, RSPAMD_NORMALIZED_DIM); - gdImageGrayScale (dst); - gdImageDestroy (src); + dst = gdImageScale(src, RSPAMD_NORMALIZED_DIM, RSPAMD_NORMALIZED_DIM); + gdImageGrayScale(dst); + gdImageDestroy(src); img->is_normalized = TRUE; - dct = g_malloc0 (sizeof (gdouble) * RSPAMD_DCT_LEN); - img->dct = g_malloc0 (RSPAMD_DCT_LEN / NBBY); - rspamd_mempool_add_destructor (task->task_pool, g_free, - img->dct); + dct = g_malloc0(sizeof(gdouble) * RSPAMD_DCT_LEN); + img->dct = g_malloc0(RSPAMD_DCT_LEN / NBBY); + rspamd_mempool_add_destructor(task->task_pool, g_free, + img->dct); /* * Split message into blocks: @@ -525,73 +522,70 @@ rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img) for (j = 0; j < RSPAMD_NORMALIZED_DIM; j += 8) { gint p[8][8]; - for (k = 0; k < 8; k ++) { - p[k][0] = gdImageGetPixel (dst, i + k, j); - p[k][1] = gdImageGetPixel (dst, i + k, j + 1); - p[k][2] = gdImageGetPixel (dst, i + k, j + 2); - p[k][3] = gdImageGetPixel (dst, i + k, j + 3); - p[k][4] = gdImageGetPixel (dst, i + k, j + 4); - p[k][5] = gdImageGetPixel (dst, i + k, j + 5); - p[k][6] = gdImageGetPixel (dst, i + k, j + 6); - p[k][7] = gdImageGetPixel (dst, i + k, j + 7); + for (k = 0; k < 8; k++) { + p[k][0] = gdImageGetPixel(dst, i + k, j); + p[k][1] = gdImageGetPixel(dst, i + k, j + 1); + p[k][2] = gdImageGetPixel(dst, i + k, j + 2); + p[k][3] = gdImageGetPixel(dst, i + k, j + 3); + p[k][4] = gdImageGetPixel(dst, i + k, j + 4); + p[k][5] = gdImageGetPixel(dst, i + k, j + 5); + p[k][6] = gdImageGetPixel(dst, i + k, j + 6); + p[k][7] = gdImageGetPixel(dst, i + k, j + 7); } - rspamd_image_dct_block (p, - dct + i * RSPAMD_NORMALIZED_DIM + j); + rspamd_image_dct_block(p, + dct + i * RSPAMD_NORMALIZED_DIM + j); gdouble avg = 0.0; - for (k = 0; k < 8; k ++) { - for (l = 0; l < 8; l ++) { + for (k = 0; k < 8; k++) { + for (l = 0; l < 8; l++) { gdouble x = *(dct + - i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l); - avg += (x - avg) / (gdouble)(k * 8 + l + 1); + i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l); + avg += (x - avg) / (gdouble) (k * 8 + l + 1); } - } - for (k = 0; k < 8; k ++) { - for (l = 0; l < 8; l ++) { + for (k = 0; k < 8; k++) { + for (l = 0; l < 8; l++) { guint idx = i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l; if (dct[idx] >= avg) { - setbit (img->dct, idx); + setbit(img->dct, idx); } } } - - } } - gdImageDestroy (dst); - g_free (dct); - rspamd_image_save_hash (task, img); + gdImageDestroy(dst); + g_free(dct); + rspamd_image_save_hash(task, img); } #endif } -struct rspamd_image* -rspamd_maybe_process_image (rspamd_mempool_t *pool, - rspamd_ftok_t *data) +struct rspamd_image * +rspamd_maybe_process_image(rspamd_mempool_t *pool, + rspamd_ftok_t *data) { enum rspamd_image_type type; struct rspamd_image *img = NULL; - if ((type = detect_image_type (data)) != IMAGE_TYPE_UNKNOWN) { + if ((type = detect_image_type(data)) != IMAGE_TYPE_UNKNOWN) { switch (type) { case IMAGE_TYPE_PNG: - img = process_png_image (pool, data); + img = process_png_image(pool, data); break; case IMAGE_TYPE_JPG: - img = process_jpg_image (pool, data); + img = process_jpg_image(pool, data); break; case IMAGE_TYPE_GIF: - img = process_gif_image (pool, data); + img = process_gif_image(pool, data); break; case IMAGE_TYPE_BMP: - img = process_bmp_image (pool, data); + img = process_bmp_image(pool, data); break; default: img = NULL; @@ -603,16 +597,16 @@ rspamd_maybe_process_image (rspamd_mempool_t *pool, } static bool -process_image (struct rspamd_task *task, struct rspamd_mime_part *part) +process_image(struct rspamd_task *task, struct rspamd_mime_part *part) { struct rspamd_image *img; - img = rspamd_maybe_process_image (task->task_pool, &part->parsed_data); + img = rspamd_maybe_process_image(task->task_pool, &part->parsed_data); if (img != NULL) { - msg_debug_images ("detected %s image of size %ud x %ud", - rspamd_image_type_str (img->type), - img->width, img->height); + msg_debug_images("detected %s image of size %ud x %ud", + rspamd_image_type_str(img->type), + img->width, img->height); if (part->cd) { img->filename = &part->cd->filename; @@ -630,7 +624,7 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part) } const gchar * -rspamd_image_type_str (enum rspamd_image_type type) +rspamd_image_type_str(enum rspamd_image_type type) { switch (type) { case IMAGE_TYPE_PNG: @@ -653,7 +647,7 @@ rspamd_image_type_str (enum rspamd_image_type type) } static void -rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *part) +rspamd_image_process_part(struct rspamd_task *task, struct rspamd_mime_part *part) { struct rspamd_mime_header *rh; struct rspamd_mime_text_part *tp; @@ -662,37 +656,38 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa guint cid_len, i; struct rspamd_image *img; - img = (struct rspamd_image *)part->specific.img; + img = (struct rspamd_image *) part->specific.img; if (img) { /* Check Content-Id */ rh = rspamd_message_get_header_from_hash(part->raw_headers, - "Content-Id", FALSE); + "Content-Id", FALSE); if (rh) { cid = rh->decoded; if (*cid == '<') { - cid ++; + cid++; } - cid_len = strlen (cid); + cid_len = strlen(cid); if (cid_len > 0) { if (cid[cid_len - 1] == '>') { - cid_len --; + cid_len--; } - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { - if (IS_TEXT_PART_HTML (tp) && tp->html != NULL) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp) + { + if (IS_TEXT_PART_HTML(tp) && tp->html != NULL) { himg = rspamd_html_find_embedded_image(tp->html, cid, cid_len); if (himg != NULL) { img->html_image = himg; himg->embedded_image = img; - msg_debug_images ("found linked image by cid: <%s>", - cid); + msg_debug_images("found linked image by cid: <%s>", + cid); if (himg->height == 0) { himg->height = img->height; @@ -709,15 +704,15 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa } } -void -rspamd_images_link (struct rspamd_task *task) +void rspamd_images_link(struct rspamd_task *task) { struct rspamd_mime_part *part; guint i; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { if (part->part_type == RSPAMD_MIME_PART_IMAGE) { - rspamd_image_process_part (task, part); + rspamd_image_process_part(task, part); } } }
\ No newline at end of file diff --git a/src/libmime/images.h b/src/libmime/images.h index 887f30a37..bf8b3be39 100644 --- a/src/libmime/images.h +++ b/src/libmime/images.h @@ -4,7 +4,7 @@ #include "config.h" #include "fstring.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -37,7 +37,7 @@ struct rspamd_image { /* * Process images from a worker task */ -void rspamd_images_process (struct rspamd_task *task); +void rspamd_images_process(struct rspamd_task *task); /** * Process image if possible in a single mime part @@ -45,13 +45,13 @@ void rspamd_images_process (struct rspamd_task *task); * @param part * @return */ -bool rspamd_images_process_mime_part_maybe (struct rspamd_task *task, - struct rspamd_mime_part *part); +bool rspamd_images_process_mime_part_maybe(struct rspamd_task *task, + struct rspamd_mime_part *part); /* * Link embedded images to the HTML parts */ -void rspamd_images_link (struct rspamd_task *task); +void rspamd_images_link(struct rspamd_task *task); /** * Processes image in raw data @@ -59,17 +59,17 @@ void rspamd_images_link (struct rspamd_task *task); * @param data * @return */ -struct rspamd_image *rspamd_maybe_process_image (rspamd_mempool_t *pool, - rspamd_ftok_t *data); +struct rspamd_image *rspamd_maybe_process_image(rspamd_mempool_t *pool, + rspamd_ftok_t *data); /* * Get textual representation of an image's type */ -const gchar *rspamd_image_type_str (enum rspamd_image_type type); +const gchar *rspamd_image_type_str(enum rspamd_image_type type); -void rspamd_image_normalize (struct rspamd_task *task, struct rspamd_image *img); +void rspamd_image_normalize(struct rspamd_task *task, struct rspamd_image *img); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c index 4d9e1ae68..52221cd32 100644 --- a/src/libmime/lang_detection.c +++ b/src/libmime/lang_detection.c @@ -47,29 +47,28 @@ struct rspamd_language_unicode_match { * List of languages detected by unicode scripts */ static const struct rspamd_language_unicode_match unicode_langs[] = { - {"el", RSPAMD_UNICODE_GREEK}, - {"ml", RSPAMD_UNICODE_MALAYALAM}, - {"te", RSPAMD_UNICODE_TELUGU}, - {"ta", RSPAMD_UNICODE_TAMIL}, - {"gu", RSPAMD_UNICODE_GUJARATI}, - {"th", RSPAMD_UNICODE_THAI}, - {"ka", RSPAMD_UNICODE_GEORGIAN}, - {"si", RSPAMD_UNICODE_SINHALA}, - {"hy", RSPAMD_UNICODE_ARMENIAN}, - {"ja", RSPAMD_UNICODE_JP}, - {"ko", RSPAMD_UNICODE_HANGUL}, + {"el", RSPAMD_UNICODE_GREEK}, + {"ml", RSPAMD_UNICODE_MALAYALAM}, + {"te", RSPAMD_UNICODE_TELUGU}, + {"ta", RSPAMD_UNICODE_TAMIL}, + {"gu", RSPAMD_UNICODE_GUJARATI}, + {"th", RSPAMD_UNICODE_THAI}, + {"ka", RSPAMD_UNICODE_GEORGIAN}, + {"si", RSPAMD_UNICODE_SINHALA}, + {"hy", RSPAMD_UNICODE_ARMENIAN}, + {"ja", RSPAMD_UNICODE_JP}, + {"ko", RSPAMD_UNICODE_HANGUL}, }; /* * Top languages */ static const gchar *tier0_langs[] = { - "en", + "en", }; static const gchar *tier1_langs[] = { - "fr", "it", "de", "es", "nl", - "pt", "ru", "pl", "tk", "th", "ar" -}; + "fr", "it", "de", "es", "nl", + "pt", "ru", "pl", "tk", "th", "ar"}; enum rspamd_language_category { RSPAMD_LANGUAGE_LATIN = 0, @@ -81,7 +80,7 @@ enum rspamd_language_category { struct rspamd_language_elt { const gchar *name; /* e.g. "en" or "ru" */ - gint flags; /* enum rspamd_language_elt_flags */ + gint flags; /* enum rspamd_language_elt_flags */ enum rspamd_language_category category; guint trigrams_words; guint stop_words; @@ -113,25 +112,25 @@ struct rspamd_stop_word_elt { GArray *ranges; /* of rspamd_stop_word_range */ }; -#define msg_debug_lang_det(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) -#define msg_debug_lang_det_cfg(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_langdet_log_id, "langdet", cfg->cfg_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) +#define msg_debug_lang_det(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) +#define msg_debug_lang_det_cfg(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_langdet_log_id, "langdet", cfg->cfg_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) INIT_LOG_MODULE_PUBLIC(langdet) static const struct rspamd_language_unicode_match * -rspamd_language_search_unicode_match (const gchar *key, - const struct rspamd_language_unicode_match *elts, size_t nelts) +rspamd_language_search_unicode_match(const gchar *key, + const struct rspamd_language_unicode_match *elts, size_t nelts) { size_t i; for (i = 0; i < nelts; i++) { - if (strcmp (elts[i].lang, key) == 0) { + if (strcmp(elts[i].lang, key) == 0) { return &elts[i]; } } @@ -140,12 +139,12 @@ rspamd_language_search_unicode_match (const gchar *key, } static gboolean -rspamd_language_search_str (const gchar *key, const gchar *elts[], size_t nelts) +rspamd_language_search_str(const gchar *key, const gchar *elts[], size_t nelts) { size_t i; for (i = 0; i < nelts; i++) { - if (strcmp (elts[i], key) == 0) { + if (strcmp(elts[i], key) == 0) { return TRUE; } } @@ -153,34 +152,34 @@ rspamd_language_search_str (const gchar *key, const gchar *elts[], size_t nelts) } static guint -rspamd_trigram_hash_func (gconstpointer key) +rspamd_trigram_hash_func(gconstpointer key) { - return rspamd_cryptobox_fast_hash (key, 3 * sizeof (UChar32), - rspamd_hash_seed ()); + return rspamd_cryptobox_fast_hash(key, 3 * sizeof(UChar32), + rspamd_hash_seed()); } static gboolean -rspamd_trigram_equal_func (gconstpointer v, gconstpointer v2) +rspamd_trigram_equal_func(gconstpointer v, gconstpointer v2) { - return memcmp (v, v2, 3 * sizeof (UChar32)) == 0; + return memcmp(v, v2, 3 * sizeof(UChar32)) == 0; } -KHASH_INIT (rspamd_trigram_hash, const UChar32 *, struct rspamd_ngramm_chain, true, - rspamd_trigram_hash_func, rspamd_trigram_equal_func); -KHASH_INIT (rspamd_candidates_hash, const gchar *, - struct rspamd_lang_detector_res *, true, - rspamd_str_hash, rspamd_str_equal); -KHASH_INIT (rspamd_stopwords_hash, rspamd_ftok_t *, - char, false, - rspamd_ftok_hash, rspamd_ftok_equal); - -KHASH_INIT (rspamd_languages_hash, const gchar *, struct rspamd_language_elt *, true, - rspamd_str_hash, rspamd_str_equal); +KHASH_INIT(rspamd_trigram_hash, const UChar32 *, struct rspamd_ngramm_chain, true, + rspamd_trigram_hash_func, rspamd_trigram_equal_func); +KHASH_INIT(rspamd_candidates_hash, const gchar *, + struct rspamd_lang_detector_res *, true, + rspamd_str_hash, rspamd_str_equal); +KHASH_INIT(rspamd_stopwords_hash, rspamd_ftok_t *, + char, false, + rspamd_ftok_hash, rspamd_ftok_equal); + +KHASH_INIT(rspamd_languages_hash, const gchar *, struct rspamd_language_elt *, true, + rspamd_str_hash, rspamd_str_equal); struct rspamd_lang_detector { - khash_t(rspamd_languages_hash) *languages; - khash_t(rspamd_trigram_hash) *trigrams[RSPAMD_LANGUAGE_MAX]; /* trigrams frequencies */ + khash_t(rspamd_languages_hash) * languages; + khash_t(rspamd_trigram_hash) * trigrams[RSPAMD_LANGUAGE_MAX]; /* trigrams frequencies */ struct rspamd_stop_word_elt stop_words[RSPAMD_LANGUAGE_MAX]; - khash_t(rspamd_stopwords_hash) *stop_words_norm; + khash_t(rspamd_stopwords_hash) * stop_words_norm; UConverter *uchar_converter; gsize short_text_limit; bool prefer_fasttext; @@ -190,23 +189,23 @@ struct rspamd_lang_detector { }; static void -rspamd_language_detector_ucs_lowercase (UChar32 *s, gsize len) +rspamd_language_detector_ucs_lowercase(UChar32 *s, gsize len) { gsize i; - for (i = 0; i < len; i ++) { - s[i] = u_tolower (s[i]); + for (i = 0; i < len; i++) { + s[i] = u_tolower(s[i]); } } static gboolean -rspamd_language_detector_ucs_is_latin (const UChar32 *s, gsize len) +rspamd_language_detector_ucs_is_latin(const UChar32 *s, gsize len) { gsize i; gboolean ret = TRUE; - for (i = 0; i < len; i ++) { - if (s[i] >= 128 || !(g_ascii_isalnum (s[i]) || s[i] == ' ')) { + for (i = 0; i < len; i++) { + if (s[i] >= 128 || !(g_ascii_isalnum(s[i]) || s[i] == ' ')) { ret = FALSE; break; } @@ -222,14 +221,14 @@ struct rspamd_language_ucs_elt { }; static void -rspamd_language_detector_init_ngramm (struct rspamd_config *cfg, - struct rspamd_lang_detector *d, - struct rspamd_language_elt *lelt, - struct rspamd_language_ucs_elt *ucs, - guint len, - guint freq, - guint total, - khash_t (rspamd_trigram_hash) *htb) +rspamd_language_detector_init_ngramm(struct rspamd_config *cfg, + struct rspamd_lang_detector *d, + struct rspamd_language_elt *lelt, + struct rspamd_language_ucs_elt *ucs, + guint len, + guint freq, + guint total, + khash_t(rspamd_trigram_hash) * htb) { struct rspamd_ngramm_chain *chain = NULL, st_chain; struct rspamd_ngramm_elt *elt; @@ -240,58 +239,59 @@ rspamd_language_detector_init_ngramm (struct rspamd_config *cfg, switch (len) { case 1: case 2: - g_assert_not_reached (); + g_assert_not_reached(); break; case 3: - k = kh_get (rspamd_trigram_hash, htb, ucs->s); - if (k != kh_end (htb)) { - chain = &kh_value (htb, k); + k = kh_get(rspamd_trigram_hash, htb, ucs->s); + if (k != kh_end(htb)) { + chain = &kh_value(htb, k); } break; default: - g_assert_not_reached (); + g_assert_not_reached(); break; } if (chain == NULL) { /* New element */ chain = &st_chain; - memset (chain, 0, sizeof (st_chain)); - chain->languages = g_ptr_array_sized_new (32); - rspamd_mempool_add_destructor (cfg->cfg_pool, rspamd_ptr_array_free_hard, - chain->languages); - chain->utf = rspamd_mempool_strdup (cfg->cfg_pool, ucs->utf); - elt = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (*elt)); + memset(chain, 0, sizeof(st_chain)); + chain->languages = g_ptr_array_sized_new(32); + rspamd_mempool_add_destructor(cfg->cfg_pool, rspamd_ptr_array_free_hard, + chain->languages); + chain->utf = rspamd_mempool_strdup(cfg->cfg_pool, ucs->utf); + elt = rspamd_mempool_alloc(cfg->cfg_pool, sizeof(*elt)); elt->elt = lelt; - elt->prob = ((gdouble)freq) / ((gdouble)total); - g_ptr_array_add (chain->languages, elt); + elt->prob = ((gdouble) freq) / ((gdouble) total); + g_ptr_array_add(chain->languages, elt); - k = kh_put (rspamd_trigram_hash, htb, ucs->s, &i); - kh_value (htb, k) = *chain; + k = kh_put(rspamd_trigram_hash, htb, ucs->s, &i); + kh_value(htb, k) = *chain; } else { /* Check sanity */ found = FALSE; - PTR_ARRAY_FOREACH (chain->languages, i, elt) { - if (strcmp (elt->elt->name, lelt->name) == 0) { + PTR_ARRAY_FOREACH(chain->languages, i, elt) + { + if (strcmp(elt->elt->name, lelt->name) == 0) { found = TRUE; - elt->prob += ((gdouble)freq) / ((gdouble)total); + elt->prob += ((gdouble) freq) / ((gdouble) total); break; } } if (!found) { - elt = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (*elt)); + elt = rspamd_mempool_alloc(cfg->cfg_pool, sizeof(*elt)); elt->elt = lelt; - elt->prob = ((gdouble)freq) / ((gdouble)total); - g_ptr_array_add (chain->languages, elt); + elt->prob = ((gdouble) freq) / ((gdouble) total); + g_ptr_array_add(chain->languages, elt); } } } static inline enum rspamd_language_category -rspamd_language_detector_get_category (guint uflags) +rspamd_language_detector_get_category(guint uflags) { enum rspamd_language_category cat = RSPAMD_LANGUAGE_LATIN; @@ -309,19 +309,19 @@ rspamd_language_detector_get_category (guint uflags) } static const gchar * -rspamd_language_detector_print_flags (struct rspamd_language_elt *elt) +rspamd_language_detector_print_flags(struct rspamd_language_elt *elt) { static gchar flags_buf[256]; goffset r = 0; if (elt->flags & RS_LANGUAGE_TIER1) { - r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "tier1,"); + r += rspamd_snprintf(flags_buf + r, sizeof(flags_buf) - r, "tier1,"); } if (elt->flags & RS_LANGUAGE_TIER0) { - r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "tier0,"); + r += rspamd_snprintf(flags_buf + r, sizeof(flags_buf) - r, "tier0,"); } if (elt->flags & RS_LANGUAGE_LATIN) { - r += rspamd_snprintf (flags_buf + r, sizeof (flags_buf) - r, "latin,"); + r += rspamd_snprintf(flags_buf + r, sizeof(flags_buf) - r, "latin,"); } if (r > 0) { @@ -335,19 +335,19 @@ rspamd_language_detector_print_flags (struct rspamd_language_elt *elt) } static gint -rspamd_language_detector_cmp_ngramm (gconstpointer a, gconstpointer b) +rspamd_language_detector_cmp_ngramm(gconstpointer a, gconstpointer b) { - struct rspamd_language_ucs_elt *e1 = *(struct rspamd_language_ucs_elt **)a; - struct rspamd_language_ucs_elt *e2 = *(struct rspamd_language_ucs_elt **)b; + struct rspamd_language_ucs_elt *e1 = *(struct rspamd_language_ucs_elt **) a; + struct rspamd_language_ucs_elt *e2 = *(struct rspamd_language_ucs_elt **) b; - return (gint)e2->freq - (gint)e1->freq; + return (gint) e2->freq - (gint) e1->freq; } static void -rspamd_language_detector_read_file (struct rspamd_config *cfg, - struct rspamd_lang_detector *d, - const gchar *path, - const ucl_object_t *stop_words) +rspamd_language_detector_read_file(struct rspamd_config *cfg, + struct rspamd_lang_detector *d, + const gchar *path, + const ucl_object_t *stop_words) { struct ucl_parser *parser; ucl_object_t *top; @@ -356,110 +356,110 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, UErrorCode uc_err = U_ZERO_ERROR; struct rspamd_language_elt *nelt; struct rspamd_language_ucs_elt *ucs_elt; - khash_t (rspamd_trigram_hash) *htb = NULL; + khash_t(rspamd_trigram_hash) *htb = NULL; gchar *pos; guint total = 0, total_latin = 0, total_ngramms = 0, i, skipped, - loaded, nstop = 0; + loaded, nstop = 0; gdouble mean = 0, std = 0, delta = 0, delta2 = 0, m2 = 0; enum rspamd_language_category cat = RSPAMD_LANGUAGE_MAX; - parser = ucl_parser_new (UCL_PARSER_NO_FILEVARS); - if (!ucl_parser_add_file (parser, path)) { - msg_warn_config ("cannot parse file %s: %s", path, - ucl_parser_get_error (parser)); - ucl_parser_free (parser); + parser = ucl_parser_new(UCL_PARSER_NO_FILEVARS); + if (!ucl_parser_add_file(parser, path)) { + msg_warn_config("cannot parse file %s: %s", path, + ucl_parser_get_error(parser)); + ucl_parser_free(parser); return; } - top = ucl_parser_get_object (parser); - ucl_parser_free (parser); + top = ucl_parser_get_object(parser); + ucl_parser_free(parser); - freqs = ucl_object_lookup (top, "freq"); + freqs = ucl_object_lookup(top, "freq"); if (freqs == NULL) { - msg_warn_config ("file %s has no 'freq' key", path); - ucl_object_unref (top); + msg_warn_config("file %s has no 'freq' key", path); + ucl_object_unref(top); return; } - pos = strrchr (path, '/'); - g_assert (pos != NULL); - nelt = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*nelt)); - nelt->name = rspamd_mempool_strdup (cfg->cfg_pool, pos + 1); + pos = strrchr(path, '/'); + g_assert(pos != NULL); + nelt = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(*nelt)); + nelt->name = rspamd_mempool_strdup(cfg->cfg_pool, pos + 1); /* Remove extension */ - pos = strchr (nelt->name, '.'); - g_assert (pos != NULL); + pos = strchr(nelt->name, '.'); + g_assert(pos != NULL); *pos = '\0'; - n_words = ucl_object_lookup (top, "n_words"); + n_words = ucl_object_lookup(top, "n_words"); - if (n_words == NULL || ucl_object_type (n_words) != UCL_ARRAY || - n_words->len != 3) { - msg_warn_config ("cannot find n_words in language %s", nelt->name); - ucl_object_unref (top); + if (n_words == NULL || ucl_object_type(n_words) != UCL_ARRAY || + n_words->len != 3) { + msg_warn_config("cannot find n_words in language %s", nelt->name); + ucl_object_unref(top); return; } else { - nelt->trigrams_words = ucl_object_toint (ucl_array_find_index (n_words, - 2)); + nelt->trigrams_words = ucl_object_toint(ucl_array_find_index(n_words, + 2)); } - type = ucl_object_lookup (top, "type"); + type = ucl_object_lookup(top, "type"); - if (type == NULL || ucl_object_type (type) != UCL_STRING) { - msg_debug_config ("cannot find type in language %s", nelt->name); - ucl_object_unref (top); + if (type == NULL || ucl_object_type(type) != UCL_STRING) { + msg_debug_config("cannot find type in language %s", nelt->name); + ucl_object_unref(top); return; } else { - const gchar *stype = ucl_object_tostring (type); + const gchar *stype = ucl_object_tostring(type); - if (strcmp (stype, "latin") == 0) { + if (strcmp(stype, "latin") == 0) { cat = RSPAMD_LANGUAGE_LATIN; } - else if (strcmp (stype, "cyrillic") == 0) { + else if (strcmp(stype, "cyrillic") == 0) { cat = RSPAMD_LANGUAGE_CYRILLIC; } - else if (strcmp (stype, "arab") == 0) { + else if (strcmp(stype, "arab") == 0) { cat = RSPAMD_LANGUAGE_ARAB; } - else if (strcmp (stype, "devanagari") == 0) { + else if (strcmp(stype, "devanagari") == 0) { cat = RSPAMD_LANGUAGE_DEVANAGARI; } else { - msg_debug_config ("unknown type %s of language %s", stype, nelt->name); - ucl_object_unref (top); + msg_debug_config("unknown type %s of language %s", stype, nelt->name); + ucl_object_unref(top); return; } } - flags = ucl_object_lookup (top, "flags"); + flags = ucl_object_lookup(top, "flags"); - if (flags != NULL && ucl_object_type (flags) == UCL_ARRAY) { + if (flags != NULL && ucl_object_type(flags) == UCL_ARRAY) { ucl_object_iter_t it = NULL; const ucl_object_t *cur; - while ((cur = ucl_object_iterate (flags, &it, true)) != NULL) { - const gchar *fl = ucl_object_tostring (cur); + while ((cur = ucl_object_iterate(flags, &it, true)) != NULL) { + const gchar *fl = ucl_object_tostring(cur); if (cur) { - if (strcmp (fl, "diacritics") == 0) { + if (strcmp(fl, "diacritics") == 0) { nelt->flags |= RS_LANGUAGE_DIACRITICS; } - else if (strcmp (fl, "ascii") == 0) { + else if (strcmp(fl, "ascii") == 0) { nelt->flags |= RS_LANGUAGE_ASCII; } else { - msg_debug_config ("unknown flag %s of language %s", fl, nelt->name); + msg_debug_config("unknown flag %s of language %s", fl, nelt->name); } } else { - msg_debug_config ("unknown flags type of language %s", nelt->name); + msg_debug_config("unknown flags type of language %s", nelt->name); } } } @@ -467,7 +467,7 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, if (stop_words) { const ucl_object_t *specific_stop_words; - specific_stop_words = ucl_object_lookup (stop_words, nelt->name); + specific_stop_words = ucl_object_lookup(stop_words, nelt->name); if (specific_stop_words) { struct sb_stemmer *stem = NULL; @@ -475,33 +475,33 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, const ucl_object_t *w; guint start, stop; - stem = sb_stemmer_new (nelt->name, "UTF_8"); - start = rspamd_multipattern_get_npatterns (d->stop_words[cat].mp); + stem = sb_stemmer_new(nelt->name, "UTF_8"); + start = rspamd_multipattern_get_npatterns(d->stop_words[cat].mp); - while ((w = ucl_object_iterate (specific_stop_words, &it, true)) != NULL) { + while ((w = ucl_object_iterate(specific_stop_words, &it, true)) != NULL) { gsize wlen; - const char *word = ucl_object_tolstring (w, &wlen); + const char *word = ucl_object_tolstring(w, &wlen); const char *saved; - guint mp_flags = RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8; + guint mp_flags = RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8; - if (rspamd_multipattern_has_hyperscan ()) { + if (rspamd_multipattern_has_hyperscan()) { mp_flags |= RSPAMD_MULTIPATTERN_RE; } - rspamd_multipattern_add_pattern_len (d->stop_words[cat].mp, - word, wlen, - mp_flags); - nelt->stop_words ++; - nstop ++; + rspamd_multipattern_add_pattern_len(d->stop_words[cat].mp, + word, wlen, + mp_flags); + nelt->stop_words++; + nstop++; /* Also lemmatise and store normalised */ if (stem) { - const char *nw = sb_stemmer_stem (stem, word, wlen); + const char *nw = sb_stemmer_stem(stem, word, wlen); if (nw) { saved = nw; - wlen = strlen (nw); + wlen = strlen(nw); } else { saved = word; @@ -516,23 +516,23 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, rspamd_ftok_t *tok; gchar *dst; - tok = rspamd_mempool_alloc (cfg->cfg_pool, - sizeof (*tok) + wlen + 1); - dst = ((gchar *)tok) + sizeof (*tok); - rspamd_strlcpy (dst, saved, wlen + 1); + tok = rspamd_mempool_alloc(cfg->cfg_pool, + sizeof(*tok) + wlen + 1); + dst = ((gchar *) tok) + sizeof(*tok); + rspamd_strlcpy(dst, saved, wlen + 1); tok->begin = dst; tok->len = wlen; - kh_put (rspamd_stopwords_hash, d->stop_words_norm, - tok, &rc); + kh_put(rspamd_stopwords_hash, d->stop_words_norm, + tok, &rc); } } if (stem) { - sb_stemmer_delete (stem); + sb_stemmer_delete(stem); } - stop = rspamd_multipattern_get_npatterns (d->stop_words[cat].mp); + stop = rspamd_multipattern_get_npatterns(d->stop_words[cat].mp); struct rspamd_stop_word_range r; @@ -540,7 +540,7 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, r.stop = stop; r.elt = nelt; - g_array_append_val (d->stop_words[cat].ranges, r); + g_array_append_val(d->stop_words[cat].ranges, r); it = NULL; } } @@ -551,31 +551,31 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, GPtrArray *ngramms; guint nsym; - if (rspamd_language_search_str (nelt->name, tier1_langs, - G_N_ELEMENTS (tier1_langs))) { + if (rspamd_language_search_str(nelt->name, tier1_langs, + G_N_ELEMENTS(tier1_langs))) { nelt->flags |= RS_LANGUAGE_TIER1; } - if (rspamd_language_search_str (nelt->name, tier0_langs, - G_N_ELEMENTS (tier0_langs))) { + if (rspamd_language_search_str(nelt->name, tier0_langs, + G_N_ELEMENTS(tier0_langs))) { nelt->flags |= RS_LANGUAGE_TIER0; } it = NULL; - ngramms = g_ptr_array_sized_new (freqs->len); + ngramms = g_ptr_array_sized_new(freqs->len); i = 0; skipped = 0; loaded = 0; - while ((cur = ucl_object_iterate (freqs, &it, true)) != NULL) { + while ((cur = ucl_object_iterate(freqs, &it, true)) != NULL) { const gchar *key; gsize keylen; guint freq; - key = ucl_object_keyl (cur, &keylen); - freq = ucl_object_toint (cur); + key = ucl_object_keyl(cur, &keylen); + freq = ucl_object_toint(cur); - i ++; + i++; delta = freq - mean; mean += delta / i; delta2 = freq - mean; @@ -585,41 +585,41 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, UChar32 *cur_ucs; const char *end = key + keylen, *cur_utf = key; - ucs_elt = rspamd_mempool_alloc (cfg->cfg_pool, - sizeof (*ucs_elt) + (keylen + 1) * sizeof (UChar32)); + ucs_elt = rspamd_mempool_alloc(cfg->cfg_pool, + sizeof(*ucs_elt) + (keylen + 1) * sizeof(UChar32)); cur_ucs = ucs_elt->s; nsym = 0; uc_err = U_ZERO_ERROR; while (cur_utf < end) { - *cur_ucs++ = ucnv_getNextUChar (d->uchar_converter, &cur_utf, - end, &uc_err); - if (!U_SUCCESS (uc_err)) { + *cur_ucs++ = ucnv_getNextUChar(d->uchar_converter, &cur_utf, + end, &uc_err); + if (!U_SUCCESS(uc_err)) { break; } - nsym ++; + nsym++; } - if (!U_SUCCESS (uc_err)) { - msg_warn_config ("cannot convert key %*s to unicode: %s", - (gint)keylen, key, u_errorName (uc_err)); + if (!U_SUCCESS(uc_err)) { + msg_warn_config("cannot convert key %*s to unicode: %s", + (gint) keylen, key, u_errorName(uc_err)); continue; } ucs_elt->utf = key; - rspamd_language_detector_ucs_lowercase (ucs_elt->s, nsym); + rspamd_language_detector_ucs_lowercase(ucs_elt->s, nsym); if (nsym == 3) { - g_ptr_array_add (ngramms, ucs_elt); + g_ptr_array_add(ngramms, ucs_elt); } else { continue; } - if (rspamd_language_detector_ucs_is_latin (ucs_elt->s, nsym)) { + if (rspamd_language_detector_ucs_is_latin(ucs_elt->s, nsym)) { total_latin++; } @@ -629,7 +629,7 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, } } - std = sqrt (m2 / (i - 1)); + std = sqrt(m2 / (i - 1)); if (total_latin >= total_ngramms / 3) { nelt->flags |= RS_LANGUAGE_LATIN; @@ -638,66 +638,68 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, nsym = 3; total = 0; - PTR_ARRAY_FOREACH (ngramms, i, ucs_elt) { + PTR_ARRAY_FOREACH(ngramms, i, ucs_elt) + { if (!(nelt->flags & RS_LANGUAGE_LATIN) && - rspamd_language_detector_ucs_is_latin (ucs_elt->s, nsym)) { + rspamd_language_detector_ucs_is_latin(ucs_elt->s, nsym)) { ucs_elt->freq = 0; /* Skip latin ngramm for non-latin language to avoid garbage */ - skipped ++; + skipped++; continue; } /* Now, discriminate low frequency ngramms */ total += ucs_elt->freq; - loaded ++; + loaded++; } - g_ptr_array_sort (ngramms, rspamd_language_detector_cmp_ngramm); + g_ptr_array_sort(ngramms, rspamd_language_detector_cmp_ngramm); - PTR_ARRAY_FOREACH (ngramms, i, ucs_elt) { + PTR_ARRAY_FOREACH(ngramms, i, ucs_elt) + { if (ucs_elt->freq > 0) { - rspamd_language_detector_init_ngramm (cfg, d, - nelt, ucs_elt, nsym, - ucs_elt->freq, total, htb); + rspamd_language_detector_init_ngramm(cfg, d, + nelt, ucs_elt, nsym, + ucs_elt->freq, total, htb); } } #ifdef EXTRA_LANGDET_DEBUG /* Useful for debug */ - for (i = 0; i < 10; i ++) { - ucs_elt = g_ptr_array_index (ngramms, i); + for (i = 0; i < 10; i++) { + ucs_elt = g_ptr_array_index(ngramms, i); - msg_debug_lang_det_cfg ("%s -> %s: %d", nelt->name, - ucs_elt->utf, ucs_elt->freq); - } + msg_debug_lang_det_cfg("%s -> %s: %d", nelt->name, + ucs_elt->utf, ucs_elt->freq); + } #endif - g_ptr_array_free (ngramms, TRUE); + g_ptr_array_free(ngramms, TRUE); nelt->mean = mean; nelt->std = std; - msg_debug_lang_det_cfg ("loaded %s language, %d trigrams, " - "%d ngramms loaded; " - "std=%.2f, mean=%.2f, skipped=%d, loaded=%d, stop_words=%d; " - "(%s)", - nelt->name, - (gint)nelt->trigrams_words, - total, - std, mean, - skipped, loaded, nelt->stop_words, - rspamd_language_detector_print_flags (nelt)); + msg_debug_lang_det_cfg("loaded %s language, %d trigrams, " + "%d ngramms loaded; " + "std=%.2f, mean=%.2f, skipped=%d, loaded=%d, stop_words=%d; " + "(%s)", + nelt->name, + (gint) nelt->trigrams_words, + total, + std, mean, + skipped, loaded, nelt->stop_words, + rspamd_language_detector_print_flags(nelt)); int ret; khiter_t k = kh_put(rspamd_languages_hash, d->languages, nelt->name, &ret); - g_assert (ret > 0); /* must be unique */ + g_assert(ret > 0); /* must be unique */ kh_value(d->languages, k) = nelt; - ucl_object_unref (top); + ucl_object_unref(top); } static gboolean -rspamd_ucl_array_find_str (const gchar *str, const ucl_object_t *ar) +rspamd_ucl_array_find_str(const gchar *str, const ucl_object_t *ar) { ucl_object_iter_t it = NULL; const ucl_object_t *cur; @@ -706,9 +708,9 @@ rspamd_ucl_array_find_str (const gchar *str, const ucl_object_t *ar) return FALSE; } - while ((cur = ucl_object_iterate (ar, &it, true)) != NULL) { - if (ucl_object_type (cur) == UCL_STRING && rspamd_strcase_equal ( - ucl_object_tostring (cur), str)) { + while ((cur = ucl_object_iterate(ar, &it, true)) != NULL) { + if (ucl_object_type(cur) == UCL_STRING && rspamd_strcase_equal( + ucl_object_tostring(cur), str)) { return TRUE; } } @@ -717,72 +719,75 @@ rspamd_ucl_array_find_str (const gchar *str, const ucl_object_t *ar) } static void -rspamd_language_detector_process_chain (struct rspamd_config *cfg, - struct rspamd_ngramm_chain *chain) +rspamd_language_detector_process_chain(struct rspamd_config *cfg, + struct rspamd_ngramm_chain *chain) { struct rspamd_ngramm_elt *elt; guint i; gdouble delta, mean = 0, delta2, m2 = 0, std; if (chain->languages->len > 3) { - PTR_ARRAY_FOREACH (chain->languages, i, elt) { + PTR_ARRAY_FOREACH(chain->languages, i, elt) + { delta = elt->prob - mean; mean += delta / (i + 1); delta2 = elt->prob - mean; m2 += delta * delta2; } - std = sqrt (m2 / (i - 1)); + std = sqrt(m2 / (i - 1)); chain->mean = mean; chain->std = std; /* Now, filter elements that are lower than mean */ - PTR_ARRAY_FOREACH (chain->languages, i, elt) { + PTR_ARRAY_FOREACH(chain->languages, i, elt) + { if (elt->prob < mean) { - g_ptr_array_remove_index_fast (chain->languages, i); + g_ptr_array_remove_index_fast(chain->languages, i); #ifdef EXTRA_LANGDET_DEBUG - msg_debug_lang_det_cfg ("remove %s from %s; prob: %.4f; mean: %.4f, std: %.4f", - elt->elt->name, chain->utf, elt->prob, mean, std); + msg_debug_lang_det_cfg("remove %s from %s; prob: %.4f; mean: %.4f, std: %.4f", + elt->elt->name, chain->utf, elt->prob, mean, std); #endif } } } else { /* We have a unique ngramm, increase its weight */ - PTR_ARRAY_FOREACH (chain->languages, i, elt) { + PTR_ARRAY_FOREACH(chain->languages, i, elt) + { elt->prob *= 4.0; #ifdef EXTRA_LANGDET_DEBUG - msg_debug_lang_det_cfg ("increase weight of %s in %s; prob: %.4f", - elt->elt->name, chain->utf, elt->prob); + msg_debug_lang_det_cfg("increase weight of %s in %s; prob: %.4f", + elt->elt->name, chain->utf, elt->prob); #endif } } } static void -rspamd_language_detector_dtor (struct rspamd_lang_detector *d) +rspamd_language_detector_dtor(struct rspamd_lang_detector *d) { if (d) { - for (guint i = 0; i < RSPAMD_LANGUAGE_MAX; i ++) { - kh_destroy (rspamd_trigram_hash, d->trigrams[i]); - rspamd_multipattern_destroy (d->stop_words[i].mp); - g_array_free (d->stop_words[i].ranges, TRUE); + for (guint i = 0; i < RSPAMD_LANGUAGE_MAX; i++) { + kh_destroy(rspamd_trigram_hash, d->trigrams[i]); + rspamd_multipattern_destroy(d->stop_words[i].mp); + g_array_free(d->stop_words[i].ranges, TRUE); } if (d->languages) { - kh_destroy (rspamd_languages_hash, d->languages); + kh_destroy(rspamd_languages_hash, d->languages); } - kh_destroy (rspamd_stopwords_hash, d->stop_words_norm); + kh_destroy(rspamd_stopwords_hash, d->stop_words_norm); rspamd_lang_detection_fasttext_destroy(d->fasttext_detector); } } -struct rspamd_lang_detector* -rspamd_language_detector_init (struct rspamd_config *cfg) +struct rspamd_lang_detector * +rspamd_language_detector_init(struct rspamd_config *cfg) { const ucl_object_t *section, *elt, *languages_enable = NULL, - *languages_disable = NULL; + *languages_disable = NULL; const gchar *languages_path = default_languages_path; glob_t gl; size_t i, short_text_limit = default_short_text_limit, total = 0; @@ -795,153 +800,153 @@ rspamd_language_detector_init (struct rspamd_config *cfg) ucl_object_t *stop_words; bool prefer_fasttext = true; - section = ucl_object_lookup (cfg->rcl_obj, "lang_detection"); + section = ucl_object_lookup(cfg->rcl_obj, "lang_detection"); if (section != NULL) { - elt = ucl_object_lookup (section, "languages"); + elt = ucl_object_lookup(section, "languages"); if (elt) { - languages_path = ucl_object_tostring (elt); + languages_path = ucl_object_tostring(elt); } - elt = ucl_object_lookup (section, "short_text_limit"); + elt = ucl_object_lookup(section, "short_text_limit"); if (elt) { - short_text_limit = ucl_object_toint (elt); + short_text_limit = ucl_object_toint(elt); } - languages_enable = ucl_object_lookup (section, "languages_enable"); - languages_disable = ucl_object_lookup (section, "languages_disable"); + languages_enable = ucl_object_lookup(section, "languages_enable"); + languages_disable = ucl_object_lookup(section, "languages_disable"); elt = ucl_object_lookup(section, "prefer_fasttext"); if (elt) { - prefer_fasttext = ucl_object_toboolean (elt); + prefer_fasttext = ucl_object_toboolean(elt); } } - languages_pattern = g_string_sized_new (PATH_MAX); - rspamd_printf_gstring (languages_pattern, "%s/stop_words", languages_path); - parser = ucl_parser_new (UCL_PARSER_DEFAULT); + languages_pattern = g_string_sized_new(PATH_MAX); + rspamd_printf_gstring(languages_pattern, "%s/stop_words", languages_path); + parser = ucl_parser_new(UCL_PARSER_DEFAULT); - if (ucl_parser_add_file (parser, languages_pattern->str)) { - stop_words = ucl_parser_get_object (parser); + if (ucl_parser_add_file(parser, languages_pattern->str)) { + stop_words = ucl_parser_get_object(parser); } else { - msg_err_config ("cannot read stop words from %s: %s", - languages_pattern->str, - ucl_parser_get_error (parser)); + msg_err_config("cannot read stop words from %s: %s", + languages_pattern->str, + ucl_parser_get_error(parser)); stop_words = NULL; } - ucl_parser_free (parser); + ucl_parser_free(parser); languages_pattern->len = 0; - rspamd_printf_gstring (languages_pattern, "%s/*.json", languages_path); - memset (&gl, 0, sizeof (gl)); + rspamd_printf_gstring(languages_pattern, "%s/*.json", languages_path); + memset(&gl, 0, sizeof(gl)); - if (glob (languages_pattern->str, 0, NULL, &gl) != 0) { - msg_err_config ("cannot read any files matching %v", languages_pattern); + if (glob(languages_pattern->str, 0, NULL, &gl) != 0) { + msg_err_config("cannot read any files matching %v", languages_pattern); goto end; } - ret = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*ret)); + ret = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(*ret)); ret->languages = kh_init(rspamd_languages_hash); kh_resize(rspamd_languages_hash, ret->languages, gl.gl_pathc); - ret->uchar_converter = rspamd_get_utf8_converter (); + ret->uchar_converter = rspamd_get_utf8_converter(); ret->short_text_limit = short_text_limit; - ret->stop_words_norm = kh_init (rspamd_stopwords_hash); + ret->stop_words_norm = kh_init(rspamd_stopwords_hash); ret->prefer_fasttext = prefer_fasttext; /* Map from ngramm in ucs32 to GPtrArray of rspamd_language_elt */ - for (i = 0; i < RSPAMD_LANGUAGE_MAX; i ++) { - ret->trigrams[i] = kh_init (rspamd_trigram_hash); + for (i = 0; i < RSPAMD_LANGUAGE_MAX; i++) { + ret->trigrams[i] = kh_init(rspamd_trigram_hash); #ifdef WITH_HYPERSCAN - ret->stop_words[i].mp = rspamd_multipattern_create ( - RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8| - RSPAMD_MULTIPATTERN_RE); + ret->stop_words[i].mp = rspamd_multipattern_create( + RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8 | + RSPAMD_MULTIPATTERN_RE); #else - ret->stop_words[i].mp = rspamd_multipattern_create ( - RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8); + ret->stop_words[i].mp = rspamd_multipattern_create( + RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8); #endif - ret->stop_words[i].ranges = g_array_new (FALSE, FALSE, - sizeof (struct rspamd_stop_word_range)); + ret->stop_words[i].ranges = g_array_new(FALSE, FALSE, + sizeof(struct rspamd_stop_word_range)); } - g_assert (uc_err == U_ZERO_ERROR); + g_assert(uc_err == U_ZERO_ERROR); - for (i = 0; i < gl.gl_pathc; i ++) { - fname = g_path_get_basename (gl.gl_pathv[i]); + for (i = 0; i < gl.gl_pathc; i++) { + fname = g_path_get_basename(gl.gl_pathv[i]); - if (!rspamd_ucl_array_find_str (fname, languages_disable) || - (languages_enable == NULL || - rspamd_ucl_array_find_str (fname, languages_enable))) { - rspamd_language_detector_read_file (cfg, ret, gl.gl_pathv[i], - stop_words); + if (!rspamd_ucl_array_find_str(fname, languages_disable) || + (languages_enable == NULL || + rspamd_ucl_array_find_str(fname, languages_enable))) { + rspamd_language_detector_read_file(cfg, ret, gl.gl_pathv[i], + stop_words); } else { - msg_info_config ("skip language file %s: disabled", fname); + msg_info_config("skip language file %s: disabled", fname); } - g_free (fname); + g_free(fname); } - for (i = 0; i < RSPAMD_LANGUAGE_MAX; i ++) { + for (i = 0; i < RSPAMD_LANGUAGE_MAX; i++) { GError *err = NULL; - kh_foreach_value (ret->trigrams[i], schain, { + kh_foreach_value(ret->trigrams[i], schain, { chain = &schain; - rspamd_language_detector_process_chain (cfg, chain); + rspamd_language_detector_process_chain(cfg, chain); }); - if (!rspamd_multipattern_compile (ret->stop_words[i].mp, &err)) { - msg_err_config ("cannot compile stop words for %z language group: %e", - i, err); - g_error_free (err); + if (!rspamd_multipattern_compile(ret->stop_words[i].mp, &err)) { + msg_err_config("cannot compile stop words for %z language group: %e", + i, err); + g_error_free(err); } - total += kh_size (ret->trigrams[i]); + total += kh_size(ret->trigrams[i]); } ret->fasttext_detector = rspamd_lang_detection_fasttext_init(cfg); char *fasttext_status = rspamd_lang_detection_fasttext_show_info(ret->fasttext_detector); - msg_info_config ("loaded %d languages, " - "%d trigrams; %s", - (gint)kh_size(ret->languages), - (gint)total, fasttext_status); - g_free (fasttext_status); + msg_info_config("loaded %d languages, " + "%d trigrams; %s", + (gint) kh_size(ret->languages), + (gint) total, fasttext_status); + g_free(fasttext_status); if (stop_words) { - ucl_object_unref (stop_words); + ucl_object_unref(stop_words); } - REF_INIT_RETAIN (ret, rspamd_language_detector_dtor); - rspamd_mempool_add_destructor (cfg->cfg_pool, - (rspamd_mempool_destruct_t)rspamd_language_detector_unref, - ret); + REF_INIT_RETAIN(ret, rspamd_language_detector_dtor); + rspamd_mempool_add_destructor(cfg->cfg_pool, + (rspamd_mempool_destruct_t) rspamd_language_detector_unref, + ret); end: if (gl.gl_pathc > 0) { - globfree (&gl); + globfree(&gl); } - g_string_free (languages_pattern, TRUE); + g_string_free(languages_pattern, TRUE); return ret; } static void -rspamd_language_detector_random_select (GArray *ucs_tokens, guint nwords, - goffset *offsets_out) +rspamd_language_detector_random_select(GArray *ucs_tokens, guint nwords, + goffset *offsets_out) { guint step_len, remainder, i, out_idx; guint64 coin, sel; rspamd_stat_token_t *tok; - g_assert (nwords != 0); - g_assert (offsets_out != NULL); - g_assert (ucs_tokens->len >= nwords); + g_assert(nwords != 0); + g_assert(offsets_out != NULL); + g_assert(ucs_tokens->len >= nwords); /* * We split input array into `nwords` parts. For each part we randomly select * an element from this particular split. Here is an example: @@ -962,30 +967,30 @@ rspamd_language_detector_random_select (GArray *ucs_tokens, guint nwords, remainder = ucs_tokens->len % nwords; out_idx = 0; - coin = rspamd_random_uint64_fast (); + coin = rspamd_random_uint64_fast(); sel = coin % (step_len + remainder); offsets_out[out_idx] = sel; for (i = step_len + remainder; i < ucs_tokens->len; - i += step_len, out_idx ++) { + i += step_len, out_idx++) { guint ntries = 0; - coin = rspamd_random_uint64_fast (); + coin = rspamd_random_uint64_fast(); sel = (coin % step_len) + i; for (;;) { - tok = &g_array_index (ucs_tokens, rspamd_stat_token_t, sel); + tok = &g_array_index(ucs_tokens, rspamd_stat_token_t, sel); /* Filter bad tokens */ if (tok->unicode.len >= 2 && - !(tok->flags & RSPAMD_STAT_TOKEN_FLAG_EXCEPTION) && - u_isalpha (tok->unicode.begin[0]) && - u_isalpha (tok->unicode.begin[tok->unicode.len - 1])) { + !(tok->flags & RSPAMD_STAT_TOKEN_FLAG_EXCEPTION) && + u_isalpha(tok->unicode.begin[0]) && + u_isalpha(tok->unicode.begin[tok->unicode.len - 1])) { offsets_out[out_idx] = sel; break; } else { - ntries ++; - coin = rspamd_random_uint64_fast (); + ntries++; + coin = rspamd_random_uint64_fast(); if (ntries < step_len) { sel = (coin % step_len) + i; @@ -1022,8 +1027,8 @@ rspamd_language_detector_random_select (GArray *ucs_tokens, guint nwords, } static goffset -rspamd_language_detector_next_ngramm (rspamd_stat_token_t *tok, UChar32 *window, - guint wlen, goffset cur_off) +rspamd_language_detector_next_ngramm(rspamd_stat_token_t *tok, UChar32 *window, + guint wlen, goffset cur_off) { guint i; @@ -1031,18 +1036,18 @@ rspamd_language_detector_next_ngramm (rspamd_stat_token_t *tok, UChar32 *window, /* Deal with spaces at the beginning and ending */ if (cur_off == 0) { - window[0] = (UChar32)' '; + window[0] = (UChar32) ' '; - for (i = 0; i < wlen - 1; i ++) { + for (i = 0; i < wlen - 1; i++) { window[i + 1] = tok->unicode.begin[i]; } } else if (cur_off + wlen == tok->unicode.len + 1) { /* Add trailing space */ - for (i = 0; i < wlen - 1; i ++) { + for (i = 0; i < wlen - 1; i++) { window[i] = tok->unicode.begin[cur_off + i]; } - window[wlen - 1] = (UChar32)' '; + window[wlen - 1] = (UChar32) ' '; } else if (cur_off + wlen > tok->unicode.len + 1) { /* No more fun */ @@ -1070,11 +1075,11 @@ rspamd_language_detector_next_ngramm (rspamd_stat_token_t *tok, UChar32 *window, * Do full guess for a specific ngramm, checking all languages defined */ static void -rspamd_language_detector_process_ngramm_full (struct rspamd_task *task, - struct rspamd_lang_detector *d, - UChar32 *window, - khash_t(rspamd_candidates_hash) *candidates, - khash_t(rspamd_trigram_hash) *trigrams) +rspamd_language_detector_process_ngramm_full(struct rspamd_task *task, + struct rspamd_lang_detector *d, + UChar32 *window, + khash_t(rspamd_candidates_hash) * candidates, + khash_t(rspamd_trigram_hash) * trigrams) { guint i; gint ret; @@ -1084,41 +1089,43 @@ rspamd_language_detector_process_ngramm_full (struct rspamd_task *task, khiter_t k; gdouble prob; - k = kh_get (rspamd_trigram_hash, trigrams, window); - if (k != kh_end (trigrams)) { - chain = &kh_value (trigrams, k); + k = kh_get(rspamd_trigram_hash, trigrams, window); + if (k != kh_end(trigrams)) { + chain = &kh_value(trigrams, k); } if (chain) { - PTR_ARRAY_FOREACH (chain->languages, i, elt) { + PTR_ARRAY_FOREACH(chain->languages, i, elt) + { prob = elt->prob; if (prob < chain->mean) { continue; } - k = kh_get (rspamd_candidates_hash, candidates, elt->elt->name); - if (k != kh_end (candidates)) { - cand = kh_value (candidates, k); + k = kh_get(rspamd_candidates_hash, candidates, elt->elt->name); + if (k != kh_end(candidates)) { + cand = kh_value(candidates, k); } else { cand = NULL; } #ifdef NGRAMMS_DEBUG - msg_err ("gramm: %s, lang: %s, prob: %.3f", chain->utf, - elt->elt->name, log2 (elt->prob)); + msg_err("gramm: %s, lang: %s, prob: %.3f", chain->utf, + elt->elt->name, log2(elt->prob)); #endif if (cand == NULL) { - cand = rspamd_mempool_alloc (task->task_pool, sizeof (*cand)); + cand = rspamd_mempool_alloc(task->task_pool, sizeof(*cand)); cand->elt = elt->elt; cand->lang = elt->elt->name; cand->prob = prob; - k = kh_put (rspamd_candidates_hash, candidates, elt->elt->name, - &ret); - kh_value (candidates, k) = cand; - } else { + k = kh_put(rspamd_candidates_hash, candidates, elt->elt->name, + &ret); + kh_value(candidates, k) = cand; + } + else { /* Update guess */ cand->prob += prob; } @@ -1127,21 +1134,20 @@ rspamd_language_detector_process_ngramm_full (struct rspamd_task *task, } static void -rspamd_language_detector_detect_word (struct rspamd_task *task, - struct rspamd_lang_detector *d, - rspamd_stat_token_t *tok, - khash_t(rspamd_candidates_hash) *candidates, - khash_t(rspamd_trigram_hash) *trigrams) +rspamd_language_detector_detect_word(struct rspamd_task *task, + struct rspamd_lang_detector *d, + rspamd_stat_token_t *tok, + khash_t(rspamd_candidates_hash) * candidates, + khash_t(rspamd_trigram_hash) * trigrams) { const guint wlen = 3; UChar32 window[3]; goffset cur = 0; /* Split words */ - while ((cur = rspamd_language_detector_next_ngramm (tok, window, wlen, cur)) - != -1) { - rspamd_language_detector_process_ngramm_full (task, - d, window, candidates, trigrams); + while ((cur = rspamd_language_detector_next_ngramm(tok, window, wlen, cur)) != -1) { + rspamd_language_detector_process_ngramm_full(task, + d, window, candidates, trigrams); } } @@ -1152,24 +1158,24 @@ static const gdouble cutoff_limit = -8.0; */ static inline void -rspamd_language_detector_filter_step1 (struct rspamd_task *task, - struct rspamd_lang_detector_res *cand, - gdouble *max_prob, guint *filtered) +rspamd_language_detector_filter_step1(struct rspamd_task *task, + struct rspamd_lang_detector_res *cand, + gdouble *max_prob, guint *filtered) { - if (!isnan (cand->prob)) { + if (!isnan(cand->prob)) { if (cand->prob == 0) { cand->prob = NAN; - msg_debug_lang_det ( - "exclude language %s", - cand->lang); + msg_debug_lang_det( + "exclude language %s", + cand->lang); (*filtered)++; } else { - cand->prob = log2 (cand->prob); + cand->prob = log2(cand->prob); if (cand->prob < cutoff_limit) { - msg_debug_lang_det ( - "exclude language %s: %.3f, cutoff limit: %.3f", - cand->lang, cand->prob, cutoff_limit); + msg_debug_lang_det( + "exclude language %s: %.3f, cutoff limit: %.3f", + cand->lang, cand->prob, cutoff_limit); cand->prob = NAN; (*filtered)++; } @@ -1181,76 +1187,76 @@ rspamd_language_detector_filter_step1 (struct rspamd_task *task, } static inline void -rspamd_language_detector_filter_step2 (struct rspamd_task *task, - struct rspamd_lang_detector_res *cand, - gdouble max_prob, guint *filtered) +rspamd_language_detector_filter_step2(struct rspamd_task *task, + struct rspamd_lang_detector_res *cand, + gdouble max_prob, guint *filtered) { /* * Probabilities are logarithmic, so if prob1 - prob2 > 4, it means that * prob2 is 2^4 less than prob1 */ - if (!isnan (cand->prob) && max_prob - cand->prob > 1) { - msg_debug_lang_det ("exclude language %s: %.3f (%.3f max)", - cand->lang, cand->prob, max_prob); + if (!isnan(cand->prob) && max_prob - cand->prob > 1) { + msg_debug_lang_det("exclude language %s: %.3f (%.3f max)", + cand->lang, cand->prob, max_prob); cand->prob = NAN; - (*filtered) ++; + (*filtered)++; } } static void -rspamd_language_detector_filter_negligible (struct rspamd_task *task, - khash_t(rspamd_candidates_hash) *candidates) +rspamd_language_detector_filter_negligible(struct rspamd_task *task, + khash_t(rspamd_candidates_hash) * candidates) { struct rspamd_lang_detector_res *cand; guint filtered = 0; gdouble max_prob = -(G_MAXDOUBLE); - kh_foreach_value (candidates, cand, - rspamd_language_detector_filter_step1 (task, cand, &max_prob, &filtered)); - kh_foreach_value (candidates, cand, - rspamd_language_detector_filter_step2 (task, cand, max_prob, &filtered)); + kh_foreach_value(candidates, cand, + rspamd_language_detector_filter_step1(task, cand, &max_prob, &filtered)); + kh_foreach_value(candidates, cand, + rspamd_language_detector_filter_step2(task, cand, max_prob, &filtered)); - msg_debug_lang_det ("removed %d languages", filtered); + msg_debug_lang_det("removed %d languages", filtered); } static void -rspamd_language_detector_detect_type (struct rspamd_task *task, - guint nwords, - struct rspamd_lang_detector *d, - GArray *words, - enum rspamd_language_category cat, - khash_t(rspamd_candidates_hash) *candidates) +rspamd_language_detector_detect_type(struct rspamd_task *task, + guint nwords, + struct rspamd_lang_detector *d, + GArray *words, + enum rspamd_language_category cat, + khash_t(rspamd_candidates_hash) * candidates) { - guint nparts = MIN (words->len, nwords); + guint nparts = MIN(words->len, nwords); goffset *selected_words; rspamd_stat_token_t *tok; guint i; - selected_words = g_new0 (goffset, nparts); - rspamd_language_detector_random_select (words, nparts, selected_words); - msg_debug_lang_det ("randomly selected %d words", nparts); + selected_words = g_new0(goffset, nparts); + rspamd_language_detector_random_select(words, nparts, selected_words); + msg_debug_lang_det("randomly selected %d words", nparts); for (i = 0; i < nparts; i++) { - tok = &g_array_index (words, rspamd_stat_token_t, - selected_words[i]); + tok = &g_array_index(words, rspamd_stat_token_t, + selected_words[i]); if (tok->unicode.len >= 3) { - rspamd_language_detector_detect_word (task, d, tok, candidates, - d->trigrams[cat]); + rspamd_language_detector_detect_word(task, d, tok, candidates, + d->trigrams[cat]); } } /* Filter negligible candidates */ - rspamd_language_detector_filter_negligible (task, candidates); - g_free (selected_words); + rspamd_language_detector_filter_negligible(task, candidates); + g_free(selected_words); } static gint -rspamd_language_detector_cmp (gconstpointer a, gconstpointer b) +rspamd_language_detector_cmp(gconstpointer a, gconstpointer b) { const struct rspamd_lang_detector_res - *canda = *(const struct rspamd_lang_detector_res **)a, - *candb = *(const struct rspamd_lang_detector_res **)b; + *canda = *(const struct rspamd_lang_detector_res **) a, + *candb = *(const struct rspamd_lang_detector_res **) b; if (canda->prob > candb->prob) { return -1; @@ -1269,26 +1275,26 @@ enum rspamd_language_detected_type { }; static enum rspamd_language_detected_type -rspamd_language_detector_try_ngramm (struct rspamd_task *task, - guint nwords, - struct rspamd_lang_detector *d, - GArray *ucs_tokens, - enum rspamd_language_category cat, - khash_t(rspamd_candidates_hash) *candidates) +rspamd_language_detector_try_ngramm(struct rspamd_task *task, + guint nwords, + struct rspamd_lang_detector *d, + GArray *ucs_tokens, + enum rspamd_language_category cat, + khash_t(rspamd_candidates_hash) * candidates) { guint cand_len = 0; struct rspamd_lang_detector_res *cand; - rspamd_language_detector_detect_type (task, - nwords, - d, - ucs_tokens, - cat, - candidates); + rspamd_language_detector_detect_type(task, + nwords, + d, + ucs_tokens, + cat, + candidates); - kh_foreach_value (candidates, cand, { - if (!isnan (cand->prob)) { - cand_len ++; + kh_foreach_value(candidates, cand, { + if (!isnan(cand->prob)) { + cand_len++; } }); @@ -1319,13 +1325,13 @@ static const gdouble tier1_adjustment = 0.8; static const gdouble frequency_adjustment = 0.8; static gint -rspamd_language_detector_cmp_heuristic (gconstpointer a, gconstpointer b, - gpointer ud) +rspamd_language_detector_cmp_heuristic(gconstpointer a, gconstpointer b, + gpointer ud) { struct rspamd_frequency_sort_cbdata *cbd = ud; const struct rspamd_lang_detector_res - *canda = *(const struct rspamd_lang_detector_res **)a, - *candb = *(const struct rspamd_lang_detector_res **)b; + *canda = *(const struct rspamd_lang_detector_res **) a, + *candb = *(const struct rspamd_lang_detector_res **) b; gdouble adj; gdouble proba_adjusted, probb_adjusted, freqa, freqb; @@ -1333,15 +1339,15 @@ rspamd_language_detector_cmp_heuristic (gconstpointer a, gconstpointer b, return 0; } - freqa = ((gdouble)canda->elt->occurrences) / - (gdouble)cbd->d->total_occurrences; - freqb = ((gdouble)candb->elt->occurrences) / - (gdouble)cbd->d->total_occurrences; + freqa = ((gdouble) canda->elt->occurrences) / + (gdouble) cbd->d->total_occurrences; + freqb = ((gdouble) candb->elt->occurrences) / + (gdouble) cbd->d->total_occurrences; proba_adjusted = canda->prob; probb_adjusted = candb->prob; - if (isnormal (freqa) && isnormal (freqb)) { + if (isnormal(freqa) && isnormal(freqb)) { proba_adjusted += cbd->std * (frequency_adjustment * freqa); probb_adjusted += cbd->std * (frequency_adjustment * freqb); } @@ -1386,10 +1392,10 @@ rspamd_language_detector_cmp_heuristic (gconstpointer a, gconstpointer b, } static void -rspamd_language_detector_unicode_scripts (struct rspamd_task *task, - struct rspamd_mime_text_part *part, - guint *pchinese, - guint *pspecial) +rspamd_language_detector_unicode_scripts(struct rspamd_task *task, + struct rspamd_mime_text_part *part, + guint *pchinese, + guint *pspecial) { const gchar *p = part->utf_stripped_content->data, *end; guint i = 0, cnt = 0; @@ -1399,33 +1405,33 @@ rspamd_language_detector_unicode_scripts (struct rspamd_task *task, const guint cutoff_limit = 32; while (p + i < end) { - U8_NEXT (p, i, part->utf_stripped_content->len, uc); + U8_NEXT(p, i, part->utf_stripped_content->len, uc); if (((gint32) uc) < 0) { break; } - if (u_isalpha (uc)) { - sc = ublock_getCode (uc); - cnt ++; + if (u_isalpha(uc)) { + sc = ublock_getCode(uc); + cnt++; switch (sc) { case UBLOCK_BASIC_LATIN: case UBLOCK_LATIN_1_SUPPLEMENT: part->unicode_scripts |= RSPAMD_UNICODE_LATIN; - nlatin ++; + nlatin++; break; case UBLOCK_HEBREW: part->unicode_scripts |= RSPAMD_UNICODE_HEBREW; - nspecial ++; + nspecial++; break; case UBLOCK_GREEK: part->unicode_scripts |= RSPAMD_UNICODE_GREEK; - nspecial ++; + nspecial++; break; case UBLOCK_CYRILLIC: part->unicode_scripts |= RSPAMD_UNICODE_CYRILLIC; - nspecial ++; + nspecial++; break; case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: case UBLOCK_CJK_COMPATIBILITY: @@ -1433,57 +1439,57 @@ rspamd_language_detector_unicode_scripts (struct rspamd_task *task, case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B: part->unicode_scripts |= RSPAMD_UNICODE_CJK; - nchinese ++; + nchinese++; break; case UBLOCK_HIRAGANA: case UBLOCK_KATAKANA: part->unicode_scripts |= RSPAMD_UNICODE_JP; - nspecial ++; + nspecial++; break; case UBLOCK_HANGUL_JAMO: case UBLOCK_HANGUL_COMPATIBILITY_JAMO: part->unicode_scripts |= RSPAMD_UNICODE_HANGUL; - nspecial ++; + nspecial++; break; case UBLOCK_ARABIC: part->unicode_scripts |= RSPAMD_UNICODE_ARABIC; - nspecial ++; + nspecial++; break; case UBLOCK_DEVANAGARI: part->unicode_scripts |= RSPAMD_UNICODE_DEVANAGARI; - nspecial ++; + nspecial++; break; case UBLOCK_ARMENIAN: part->unicode_scripts |= RSPAMD_UNICODE_ARMENIAN; - nspecial ++; + nspecial++; break; case UBLOCK_GEORGIAN: part->unicode_scripts |= RSPAMD_UNICODE_GEORGIAN; - nspecial ++; + nspecial++; break; case UBLOCK_GUJARATI: part->unicode_scripts |= RSPAMD_UNICODE_GUJARATI; - nspecial ++; + nspecial++; break; case UBLOCK_TELUGU: part->unicode_scripts |= RSPAMD_UNICODE_TELUGU; - nspecial ++; + nspecial++; break; case UBLOCK_TAMIL: part->unicode_scripts |= RSPAMD_UNICODE_TAMIL; - nspecial ++; + nspecial++; break; case UBLOCK_THAI: part->unicode_scripts |= RSPAMD_UNICODE_THAI; - nspecial ++; + nspecial++; break; case RSPAMD_UNICODE_MALAYALAM: part->unicode_scripts |= RSPAMD_UNICODE_MALAYALAM; - nspecial ++; + nspecial++; break; case RSPAMD_UNICODE_SINHALA: part->unicode_scripts |= RSPAMD_UNICODE_SINHALA; - nspecial ++; + nspecial++; break; } } @@ -1499,51 +1505,51 @@ rspamd_language_detector_unicode_scripts (struct rspamd_task *task, } } - msg_debug_lang_det ("stop after checking %d characters, " - "%d latin, %d special, %d chinese", - cnt, nlatin, nspecial, nchinese); + msg_debug_lang_det("stop after checking %d characters, " + "%d latin, %d special, %d chinese", + cnt, nlatin, nspecial, nchinese); *pchinese = nchinese; *pspecial = nspecial; } static inline void -rspamd_language_detector_set_language (struct rspamd_task *task, - struct rspamd_mime_text_part *part, - const gchar *code, - struct rspamd_language_elt *elt) +rspamd_language_detector_set_language(struct rspamd_task *task, + struct rspamd_mime_text_part *part, + const gchar *code, + struct rspamd_language_elt *elt) { struct rspamd_lang_detector_res *r; - r = rspamd_mempool_alloc0 (task->task_pool, sizeof (*r)); + r = rspamd_mempool_alloc0(task->task_pool, sizeof(*r)); r->prob = 1.0; r->lang = code; r->elt = elt; if (part->languages == NULL) { - part->languages = g_ptr_array_sized_new (1); + part->languages = g_ptr_array_sized_new(1); } - g_ptr_array_add (part->languages, r); + g_ptr_array_add(part->languages, r); part->language = code; } static gboolean -rspamd_language_detector_try_uniscript (struct rspamd_task *task, - struct rspamd_mime_text_part *part, - guint nchinese, - guint nspecial) +rspamd_language_detector_try_uniscript(struct rspamd_task *task, + struct rspamd_mime_text_part *part, + guint nchinese, + guint nspecial) { guint i; - for (i = 0; i < G_N_ELEMENTS (unicode_langs); i ++) { + for (i = 0; i < G_N_ELEMENTS(unicode_langs); i++) { if (unicode_langs[i].unicode_code & part->unicode_scripts) { if (unicode_langs[i].unicode_code != RSPAMD_UNICODE_JP) { - msg_debug_lang_det ("set language based on unicode script %s", - unicode_langs[i].lang); - rspamd_language_detector_set_language (task, part, - unicode_langs[i].lang, NULL); + msg_debug_lang_det("set language based on unicode script %s", + unicode_langs[i].lang); + rspamd_language_detector_set_language(task, part, + unicode_langs[i].lang, NULL); return TRUE; } @@ -1558,10 +1564,10 @@ rspamd_language_detector_try_uniscript (struct rspamd_task *task, * it Chinese */ if (nchinese <= 5 || nchinese < nspecial * 5) { - msg_debug_lang_det ("set language based on unicode script %s", - unicode_langs[i].lang); - rspamd_language_detector_set_language (task, part, - unicode_langs[i].lang, NULL); + msg_debug_lang_det("set language based on unicode script %s", + unicode_langs[i].lang); + rspamd_language_detector_set_language(task, part, + unicode_langs[i].lang, NULL); return TRUE; } @@ -1570,10 +1576,10 @@ rspamd_language_detector_try_uniscript (struct rspamd_task *task, } if (part->unicode_scripts & RSPAMD_UNICODE_CJK) { - msg_debug_lang_det ("guess chinese based on CJK characters: %d chinese, %d special", - nchinese, nspecial); - rspamd_language_detector_set_language (task, part, - "zh-CN", NULL); + msg_debug_lang_det("guess chinese based on CJK characters: %d chinese, %d special", + nchinese, nspecial); + rspamd_language_detector_set_language(task, part, + "zh-CN", NULL); return TRUE; } @@ -1582,38 +1588,38 @@ rspamd_language_detector_try_uniscript (struct rspamd_task *task, } static guint -rspamd_langelt_hash_func (gconstpointer key) +rspamd_langelt_hash_func(gconstpointer key) { - const struct rspamd_language_elt *elt = (const struct rspamd_language_elt *)key; - return rspamd_cryptobox_fast_hash (elt->name, strlen (elt->name), - rspamd_hash_seed ()); + const struct rspamd_language_elt *elt = (const struct rspamd_language_elt *) key; + return rspamd_cryptobox_fast_hash(elt->name, strlen(elt->name), + rspamd_hash_seed()); } static gboolean -rspamd_langelt_equal_func (gconstpointer v, gconstpointer v2) +rspamd_langelt_equal_func(gconstpointer v, gconstpointer v2) { - const struct rspamd_language_elt *elt1 = (const struct rspamd_language_elt *)v, - *elt2 = (const struct rspamd_language_elt *)v2; - return strcmp (elt1->name, elt2->name) == 0; + const struct rspamd_language_elt *elt1 = (const struct rspamd_language_elt *) v, + *elt2 = (const struct rspamd_language_elt *) v2; + return strcmp(elt1->name, elt2->name) == 0; } /* This hash set stores a word index in the language to avoid duplicate stop words */ -KHASH_INIT (rspamd_sw_res_set, int, char, 0, kh_int_hash_func, kh_int_hash_equal); +KHASH_INIT(rspamd_sw_res_set, int, char, 0, kh_int_hash_func, kh_int_hash_equal); -KHASH_INIT (rspamd_sw_hash, struct rspamd_language_elt *, khash_t(rspamd_sw_res_set) *, 1, - rspamd_langelt_hash_func, rspamd_langelt_equal_func); +KHASH_INIT(rspamd_sw_hash, struct rspamd_language_elt *, khash_t(rspamd_sw_res_set) *, 1, + rspamd_langelt_hash_func, rspamd_langelt_equal_func); struct rspamd_sw_cbdata { struct rspamd_task *task; - khash_t (rspamd_sw_hash) *res; + khash_t(rspamd_sw_hash) * res; GArray *ranges; }; static gint -rspamd_ranges_cmp (const void *k, const void *memb) +rspamd_ranges_cmp(const void *k, const void *memb) { - gint pos = GPOINTER_TO_INT (k); - const struct rspamd_stop_word_range *r = (struct rspamd_stop_word_range *)memb; + gint pos = GPOINTER_TO_INT(k); + const struct rspamd_stop_word_range *r = (struct rspamd_stop_word_range *) memb; if (pos >= r->start && pos < r->stop) { return 0; @@ -1626,18 +1632,18 @@ rspamd_ranges_cmp (const void *k, const void *memb) } static gint -rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp, - guint strnum, - gint match_start, - gint match_pos, - const gchar *text, - gsize len, - void *context) +rspamd_language_detector_sw_cb(struct rspamd_multipattern *mp, + guint strnum, + gint match_start, + gint match_pos, + const gchar *text, + gsize len, + void *context) { /* Check if boundary */ const gchar *prev = text, *next = text + len; struct rspamd_stop_word_range *r; - struct rspamd_sw_cbdata *cbdata = (struct rspamd_sw_cbdata *)context; + struct rspamd_sw_cbdata *cbdata = (struct rspamd_sw_cbdata *) context; khiter_t k; static const gsize max_stop_words = 80; struct rspamd_task *task; @@ -1645,7 +1651,7 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp, if (match_start > 0) { prev = text + match_start - 1; - if (!(g_ascii_isspace (*prev) || g_ascii_ispunct (*prev))) { + if (!(g_ascii_isspace(*prev) || g_ascii_ispunct(*prev))) { return 0; } } @@ -1653,22 +1659,22 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp, if (match_pos < len) { next = text + match_pos; - if (!(g_ascii_isspace (*next) || g_ascii_ispunct (*next))) { + if (!(g_ascii_isspace(*next) || g_ascii_ispunct(*next))) { return 0; } } /* We have a word on the boundary, check range */ task = cbdata->task; - r = bsearch (GINT_TO_POINTER (strnum), cbdata->ranges->data, - cbdata->ranges->len, sizeof (*r), rspamd_ranges_cmp); + r = bsearch(GINT_TO_POINTER(strnum), cbdata->ranges->data, + cbdata->ranges->len, sizeof(*r), rspamd_ranges_cmp); - g_assert (r != NULL); + g_assert(r != NULL); - k = kh_get (rspamd_sw_hash, cbdata->res, r->elt); + k = kh_get(rspamd_sw_hash, cbdata->res, r->elt); gint nwords = 1; - if (k != kh_end (cbdata->res)) { + if (k != kh_end(cbdata->res)) { khiter_t set_k; int tt; @@ -1678,8 +1684,8 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp, if (set_k == kh_end(kh_value(cbdata->res, k))) { /* New word */ set_k = kh_put(rspamd_sw_res_set, kh_value(cbdata->res, k), strnum, &tt); - msg_debug_lang_det ("found new word %*s from %s language (%d stop words found so far)", - (int)(next - prev - 1), prev + 1, r->elt->name, nwords); + msg_debug_lang_det("found new word %*s from %s language (%d stop words found so far)", + (int) (next - prev - 1), prev + 1, r->elt->name, nwords); } if (nwords > max_stop_words) { @@ -1689,46 +1695,46 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp, else { gint tt; - k = kh_put (rspamd_sw_hash, cbdata->res, r->elt, &tt); + k = kh_put(rspamd_sw_hash, cbdata->res, r->elt, &tt); kh_value(cbdata->res, k) = kh_init(rspamd_sw_res_set); kh_put(rspamd_sw_res_set, kh_value(cbdata->res, k), strnum, &tt); - msg_debug_lang_det ("found new word %*s from %s language (%d stop words found so far)", - (int)(next - prev - 1), prev + 1, r->elt->name, nwords); + msg_debug_lang_det("found new word %*s from %s language (%d stop words found so far)", + (int) (next - prev - 1), prev + 1, r->elt->name, nwords); } return 0; } static gboolean -rspamd_language_detector_try_stop_words (struct rspamd_task *task, - struct rspamd_lang_detector *d, - struct rspamd_mime_text_part *part, - enum rspamd_language_category cat) +rspamd_language_detector_try_stop_words(struct rspamd_task *task, + struct rspamd_lang_detector *d, + struct rspamd_mime_text_part *part, + enum rspamd_language_category cat) { struct rspamd_stop_word_elt *elt; struct rspamd_sw_cbdata cbdata; gboolean ret = FALSE; static const int stop_words_threshold = 4, /* minimum stop words count */ - strong_confidence_threshold = 10 /* we are sure that this is enough */; + strong_confidence_threshold = 10 /* we are sure that this is enough */; elt = &d->stop_words[cat]; - cbdata.res = kh_init (rspamd_sw_hash); + cbdata.res = kh_init(rspamd_sw_hash); cbdata.ranges = elt->ranges; cbdata.task = task; - rspamd_multipattern_lookup (elt->mp, part->utf_stripped_content->data, - part->utf_stripped_content->len, rspamd_language_detector_sw_cb, - &cbdata, NULL); + rspamd_multipattern_lookup(elt->mp, part->utf_stripped_content->data, + part->utf_stripped_content->len, rspamd_language_detector_sw_cb, + &cbdata, NULL); - if (kh_size (cbdata.res) > 0) { - khash_t(rspamd_sw_res_set) *cur_res; + if (kh_size(cbdata.res) > 0) { + khash_t(rspamd_sw_res_set) * cur_res; double max_rate = G_MINDOUBLE; struct rspamd_language_elt *cur_lang, *sel = NULL; gboolean ignore_ascii = FALSE, ignore_latin = FALSE; - again: - kh_foreach (cbdata.res, cur_lang, cur_res, { + again: + kh_foreach(cbdata.res, cur_lang, cur_res, { int cur_matches = kh_size(cur_res); if (!ignore_ascii && (cur_lang->flags & RS_LANGUAGE_DIACRITICS)) { @@ -1736,8 +1742,8 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task, ignore_ascii = TRUE; sel = NULL; max_rate = G_MINDOUBLE; - msg_debug_lang_det ("ignore ascii after finding %d stop words from %s", - cur_matches, cur_lang->name); + msg_debug_lang_det("ignore ascii after finding %d stop words from %s", + cur_matches, cur_lang->name); goto again; } @@ -1746,8 +1752,8 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task, ignore_latin = TRUE; sel = NULL; max_rate = G_MINDOUBLE; - msg_debug_lang_det ("ignore latin after finding stop %d words from %s", - cur_matches, cur_lang->name); + msg_debug_lang_det("ignore latin after finding stop %d words from %s", + cur_matches, cur_lang->name); goto again; } @@ -1766,46 +1772,46 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task, } } - double rate = (double)cur_matches / (double)cur_lang->stop_words; + double rate = (double) cur_matches / (double) cur_lang->stop_words; if (rate > max_rate) { max_rate = rate; sel = cur_lang; } - msg_debug_lang_det ("found %d stop words from %s: %3f rate", - cur_matches, cur_lang->name, rate); + msg_debug_lang_det("found %d stop words from %s: %3f rate", + cur_matches, cur_lang->name, rate); }); /* Cleanup */ - kh_foreach (cbdata.res, cur_lang, cur_res, { - kh_destroy (rspamd_sw_res_set, cur_res); + kh_foreach(cbdata.res, cur_lang, cur_res, { + kh_destroy(rspamd_sw_res_set, cur_res); }); if (max_rate > 0 && sel) { - msg_debug_lang_det ("set language based on stop words script %s, %.3f found", - sel->name, max_rate); - rspamd_language_detector_set_language (task, part, - sel->name, sel); + msg_debug_lang_det("set language based on stop words script %s, %.3f found", + sel->name, max_rate); + rspamd_language_detector_set_language(task, part, + sel->name, sel); ret = TRUE; } } else { - msg_debug_lang_det ("found no stop words in a text"); + msg_debug_lang_det("found no stop words in a text"); } - kh_destroy (rspamd_sw_hash, cbdata.res); + kh_destroy(rspamd_sw_hash, cbdata.res); return ret; } gboolean -rspamd_language_detector_detect (struct rspamd_task *task, - struct rspamd_lang_detector *d, - struct rspamd_mime_text_part *part) +rspamd_language_detector_detect(struct rspamd_task *task, + struct rspamd_lang_detector *d, + struct rspamd_mime_text_part *part) { - khash_t(rspamd_candidates_hash) *candidates; + khash_t(rspamd_candidates_hash) * candidates; GPtrArray *result; gdouble mean, std, start_ticks, end_ticks; guint cand_len; @@ -1820,10 +1826,10 @@ rspamd_language_detector_detect (struct rspamd_task *task, return FALSE; } - start_ticks = rspamd_get_ticks (TRUE); + start_ticks = rspamd_get_ticks(TRUE); guint nchinese = 0, nspecial = 0; - rspamd_language_detector_unicode_scripts (task, part, &nchinese, &nspecial); + rspamd_language_detector_unicode_scripts(task, part, &nchinese, &nspecial); /* Disable internal language detection heuristics if we have fasttext */ if (!rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector) || !d->prefer_fasttext) { @@ -1844,24 +1850,24 @@ rspamd_language_detector_detect (struct rspamd_task *task, if (rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector)) { rspamd_fasttext_predict_result_t fasttext_predict_result = rspamd_lang_detection_fasttext_detect(d->fasttext_detector, task, - part->utf_words, 4); + part->utf_words, 4); ndetected = rspamd_lang_detection_fasttext_get_nlangs(fasttext_predict_result); if (ndetected > 0) { - candidates = kh_init (rspamd_candidates_hash); - kh_resize (rspamd_candidates_hash, candidates, ndetected); + candidates = kh_init(rspamd_candidates_hash); + kh_resize(rspamd_candidates_hash, candidates, ndetected); /* Now fill all results where probability is above threshold */ float max_prob = rspamd_lang_detection_fasttext_get_prob(fasttext_predict_result, 0); - for (unsigned int i = 0; i < ndetected; i ++) { + for (unsigned int i = 0; i < ndetected; i++) { float prob = rspamd_lang_detection_fasttext_get_prob(fasttext_predict_result, i); if (prob > max_prob * 0.75) { char *lang = rspamd_mempool_strdup(task->task_pool, - rspamd_lang_detection_fasttext_get_lang(fasttext_predict_result, i)); + rspamd_lang_detection_fasttext_get_lang(fasttext_predict_result, i)); int tmp; - khiter_t k = kh_put (rspamd_candidates_hash, candidates, lang, &tmp); + khiter_t k = kh_put(rspamd_candidates_hash, candidates, lang, &tmp); kh_value(candidates, k) = rspamd_mempool_alloc0(task->task_pool, sizeof(*cand)); cand = kh_value(candidates, k); @@ -1892,45 +1898,46 @@ rspamd_language_detector_detect (struct rspamd_task *task, if (ndetected == 0) { if (part->utf_words->len < default_short_text_limit) { r = rs_detect_none; - msg_debug_lang_det ("text is too short for trigrams detection: " - "%d words; at least %d words required", - (int)part->utf_words->len, - (int)default_short_text_limit); + msg_debug_lang_det("text is too short for trigrams detection: " + "%d words; at least %d words required", + (int) part->utf_words->len, + (int) default_short_text_limit); switch (cat) { case RSPAMD_LANGUAGE_CYRILLIC: - rspamd_language_detector_set_language (task, part, "ru", NULL); + rspamd_language_detector_set_language(task, part, "ru", NULL); break; case RSPAMD_LANGUAGE_DEVANAGARI: - rspamd_language_detector_set_language (task, part, "hi", NULL); + rspamd_language_detector_set_language(task, part, "hi", NULL); break; case RSPAMD_LANGUAGE_ARAB: - rspamd_language_detector_set_language (task, part, "ar", NULL); + rspamd_language_detector_set_language(task, part, "ar", NULL); break; default: case RSPAMD_LANGUAGE_LATIN: - rspamd_language_detector_set_language (task, part, "en", NULL); + rspamd_language_detector_set_language(task, part, "en", NULL); break; } - msg_debug_lang_det ("set %s language based on symbols category", - part->language); + msg_debug_lang_det("set %s language based on symbols category", + part->language); - candidates = kh_init (rspamd_candidates_hash); + candidates = kh_init(rspamd_candidates_hash); } else { - candidates = kh_init (rspamd_candidates_hash); - kh_resize (rspamd_candidates_hash, candidates, 32); + candidates = kh_init(rspamd_candidates_hash); + kh_resize(rspamd_candidates_hash, candidates, 32); - r = rspamd_language_detector_try_ngramm (task, - default_words, - d, - part->utf_words, - cat, - candidates); + r = rspamd_language_detector_try_ngramm(task, + default_words, + d, + part->utf_words, + cat, + candidates); if (r == rs_detect_none) { - msg_debug_lang_det ("no trigrams found, fallback to english"); - rspamd_language_detector_set_language (task, part, "en", NULL); - } else if (r == rs_detect_multiple) { + msg_debug_lang_det("no trigrams found, fallback to english"); + rspamd_language_detector_set_language(task, part, "en", NULL); + } + else if (r == rs_detect_multiple) { /* Check our guess */ mean = 0.0; @@ -1938,8 +1945,8 @@ rspamd_language_detector_detect (struct rspamd_task *task, cand_len = 0; /* Check distribution */ - kh_foreach_value (candidates, cand, { - if (!isnan (cand->prob)) { + kh_foreach_value(candidates, cand, { + if (!isnan(cand->prob)) { mean += cand->prob; cand_len++; } @@ -1948,22 +1955,22 @@ rspamd_language_detector_detect (struct rspamd_task *task, if (cand_len > 0) { mean /= cand_len; - kh_foreach_value (candidates, cand, { + kh_foreach_value(candidates, cand, { gdouble err; - if (!isnan (cand->prob)) { + if (!isnan(cand->prob)) { err = cand->prob - mean; - std += fabs (err); + std += fabs(err); } }); std /= cand_len; } - msg_debug_lang_det ("trigrams checked, %d candidates, %.3f mean, %.4f stddev", - cand_len, mean, std); + msg_debug_lang_det("trigrams checked, %d candidates, %.3f mean, %.4f stddev", + cand_len, mean, std); - if (cand_len > 0 && std / fabs (mean) < 0.25) { - msg_debug_lang_det ("apply frequency heuristic sorting"); + if (cand_len > 0 && std / fabs(mean) < 0.25) { + msg_debug_lang_det("apply frequency heuristic sorting"); frequency_heuristic_applied = TRUE; cbd.d = d; cbd.mean = mean; @@ -1979,26 +1986,27 @@ rspamd_language_detector_detect (struct rspamd_task *task, } /* Now, convert hash to array and sort it */ - if (r != rs_detect_none && kh_size (candidates) > 0) { - result = g_ptr_array_sized_new (kh_size (candidates)); - - kh_foreach_value (candidates, cand, { - if (!isnan (cand->prob)) { - msg_debug_lang_det ("final probability %s -> %.2f", cand->lang, - cand->prob); - g_ptr_array_add (result, cand); + if (r != rs_detect_none && kh_size(candidates) > 0) { + result = g_ptr_array_sized_new(kh_size(candidates)); + + kh_foreach_value(candidates, cand, { + if (!isnan(cand->prob)) { + msg_debug_lang_det("final probability %s -> %.2f", cand->lang, + cand->prob); + g_ptr_array_add(result, cand); } }); if (frequency_heuristic_applied) { - g_ptr_array_sort_with_data (result, - rspamd_language_detector_cmp_heuristic, (gpointer) &cbd); - } else { - g_ptr_array_sort (result, rspamd_language_detector_cmp); + g_ptr_array_sort_with_data(result, + rspamd_language_detector_cmp_heuristic, (gpointer) &cbd); + } + else { + g_ptr_array_sort(result, rspamd_language_detector_cmp); } if (result->len > 0 && !frequency_heuristic_applied) { - cand = g_ptr_array_index (result, 0); + cand = g_ptr_array_index(result, 0); if (cand->elt) { cand->elt->occurrences++; } @@ -2006,45 +2014,44 @@ rspamd_language_detector_detect (struct rspamd_task *task, } if (part->languages != NULL) { - g_ptr_array_unref (part->languages); + g_ptr_array_unref(part->languages); } part->languages = result; - part->language = ((struct rspamd_lang_detector_res *)g_ptr_array_index (result, 0))->lang; + part->language = ((struct rspamd_lang_detector_res *) g_ptr_array_index(result, 0))->lang; ret = TRUE; } else if (part->languages == NULL) { - rspamd_language_detector_set_language (task, part, "en", NULL); + rspamd_language_detector_set_language(task, part, "en", NULL); } - kh_destroy (rspamd_candidates_hash, candidates); + kh_destroy(rspamd_candidates_hash, candidates); } - end_ticks = rspamd_get_ticks (TRUE); - msg_debug_lang_det ("detected languages in %.0f ticks", - (end_ticks - start_ticks)); + end_ticks = rspamd_get_ticks(TRUE); + msg_debug_lang_det("detected languages in %.0f ticks", + (end_ticks - start_ticks)); return ret; } -struct rspamd_lang_detector* -rspamd_language_detector_ref (struct rspamd_lang_detector* d) +struct rspamd_lang_detector * +rspamd_language_detector_ref(struct rspamd_lang_detector *d) { - REF_RETAIN (d); + REF_RETAIN(d); return d; } -void -rspamd_language_detector_unref (struct rspamd_lang_detector* d) +void rspamd_language_detector_unref(struct rspamd_lang_detector *d) { - REF_RELEASE (d); + REF_RELEASE(d); } gboolean -rspamd_language_detector_is_stop_word (struct rspamd_lang_detector *d, - const gchar *word, gsize wlen) +rspamd_language_detector_is_stop_word(struct rspamd_lang_detector *d, + const gchar *word, gsize wlen) { khiter_t k; rspamd_ftok_t search; @@ -2052,17 +2059,16 @@ rspamd_language_detector_is_stop_word (struct rspamd_lang_detector *d, search.begin = word; search.len = wlen; - k = kh_get (rspamd_stopwords_hash, d->stop_words_norm, &search); + k = kh_get(rspamd_stopwords_hash, d->stop_words_norm, &search); - if (k != kh_end (d->stop_words_norm)) { + if (k != kh_end(d->stop_words_norm)) { return TRUE; } return FALSE; } -gint -rspamd_language_detector_elt_flags (const struct rspamd_language_elt *elt) +gint rspamd_language_detector_elt_flags(const struct rspamd_language_elt *elt) { if (elt) { return elt->flags; diff --git a/src/libmime/lang_detection.h b/src/libmime/lang_detection.h index 6c3234848..5423c13b7 100644 --- a/src/libmime/lang_detection.h +++ b/src/libmime/lang_detection.h @@ -22,7 +22,7 @@ #include "libstat/stat_api.h" #include "libmime/message.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -70,11 +70,11 @@ struct rspamd_lang_detector_res { * @param cfg * @return */ -struct rspamd_lang_detector *rspamd_language_detector_init (struct rspamd_config *cfg); +struct rspamd_lang_detector *rspamd_language_detector_init(struct rspamd_config *cfg); -struct rspamd_lang_detector *rspamd_language_detector_ref (struct rspamd_lang_detector *d); +struct rspamd_lang_detector *rspamd_language_detector_ref(struct rspamd_lang_detector *d); -void rspamd_language_detector_unref (struct rspamd_lang_detector *d); +void rspamd_language_detector_unref(struct rspamd_lang_detector *d); /** * Try to detect language of words @@ -83,9 +83,9 @@ void rspamd_language_detector_unref (struct rspamd_lang_detector *d); * @param words_len * @return array of struct rspamd_lang_detector_res sorted by freq descending */ -gboolean rspamd_language_detector_detect (struct rspamd_task *task, - struct rspamd_lang_detector *d, - struct rspamd_mime_text_part *part); +gboolean rspamd_language_detector_detect(struct rspamd_task *task, + struct rspamd_lang_detector *d, + struct rspamd_mime_text_part *part); /** * Returns TRUE if the specified word is known to be a stop word @@ -94,16 +94,16 @@ gboolean rspamd_language_detector_detect (struct rspamd_task *task, * @param wlen * @return */ -gboolean rspamd_language_detector_is_stop_word (struct rspamd_lang_detector *d, - const gchar *word, gsize wlen); +gboolean rspamd_language_detector_is_stop_word(struct rspamd_lang_detector *d, + const gchar *word, gsize wlen); /** * Return language flags for a specific language elt * @param elt * @return */ -gint rspamd_language_detector_elt_flags (const struct rspamd_language_elt *elt); -#ifdef __cplusplus +gint rspamd_language_detector_elt_flags(const struct rspamd_language_elt *elt); +#ifdef __cplusplus } #endif diff --git a/src/libmime/lang_detection_fasttext.cxx b/src/libmime/lang_detection_fasttext.cxx index d9e4e7192..f06e8ccb6 100644 --- a/src/libmime/lang_detection_fasttext.cxx +++ b/src/libmime/lang_detection_fasttext.cxx @@ -30,10 +30,10 @@ #ifdef WITH_FASTTEXT EXTERN_LOG_MODULE_DEF(langdet); -#define msg_debug_lang_det(...) rspamd_conditional_debug_fast (nullptr, nullptr, \ - rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \ - __FUNCTION__, \ - __VA_ARGS__) +#define msg_debug_lang_det(...) rspamd_conditional_debug_fast(nullptr, nullptr, \ + rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \ + __FUNCTION__, \ + __VA_ARGS__) namespace rspamd::langdet { class fasttext_langdet { @@ -43,7 +43,8 @@ private: bool loaded; public: - explicit fasttext_langdet(struct rspamd_config *cfg) { + explicit fasttext_langdet(struct rspamd_config *cfg) + { const auto *ucl_obj = cfg->rcl_obj; const auto *opts_section = ucl_object_find_key(ucl_obj, "lang_detection"); @@ -55,8 +56,7 @@ public: ft.loadModel(ucl_object_tostring(model)); loaded = true; model_fname = std::string{ucl_object_tostring(model)}; - } - catch (std::exception &e) { + } catch (std::exception &e) { auto err_message = fmt::format("cannot load fasttext model: {}", e.what()); msg_err_config("%s", err_message.c_str()); loaded = false; @@ -72,8 +72,12 @@ public: ~fasttext_langdet() = default; - auto is_enabled() const -> bool { return loaded; } - auto word2vec(const char *in, std::size_t len, std::vector<std::int32_t> &word_ngramms) const { + auto is_enabled() const -> bool + { + return loaded; + } + auto word2vec(const char *in, std::size_t len, std::vector<std::int32_t> &word_ngramms) const + { if (!loaded) { return; } @@ -114,23 +118,24 @@ public: ft.predict(k, words, line_predictions, 0.0f); const auto *dict = ft.getDictionary().get(); - for (const auto &pred : line_predictions) { + for (const auto &pred: line_predictions) { predictions->push_back(std::make_pair(std::exp(pred.first), dict->getLabel(pred.second))); } return predictions; } - auto model_info(void) const -> std::string { + auto model_info(void) const -> std::string + { if (!loaded) { return "fasttext model is not loaded"; } else { return fmt::format("fasttext model {}: {} languages, {} tokens", model_fname, - ft.getDictionary()->nlabels(), ft.getDictionary()->ntokens()); + ft.getDictionary()->nlabels(), ft.getDictionary()->ntokens()); } } }; -} +}// namespace rspamd::langdet #endif /* C API part */ @@ -139,12 +144,12 @@ G_BEGIN_DECLS #define FASTTEXT_MODEL_TO_C_API(p) reinterpret_cast<rspamd::langdet::fasttext_langdet *>(p) #define FASTTEXT_RESULT_TO_C_API(res) reinterpret_cast<std::vector<std::pair<fasttext::real, std::string>> *>(res) -void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg) +void *rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg) { #ifndef WITH_FASTTEXT return nullptr; #else - return (void *)new rspamd::langdet::fasttext_langdet(cfg); + return (void *) new rspamd::langdet::fasttext_langdet(cfg); #endif } @@ -187,7 +192,7 @@ rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud, words_vec.reserve(utf_words->len); for (auto i = 0; i < std::min(utf_words->len, max_fasttext_input_len); i++) { - const auto *w = &g_array_index (utf_words, rspamd_stat_token_t, i); + const auto *w = &g_array_index(utf_words, rspamd_stat_token_t, i); if (w->original.len > 0) { real_model->word2vec(w->original.begin, w->original.len, words_vec); } @@ -197,7 +202,7 @@ rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud, auto *res = real_model->detect_language(words_vec, k); - return (rspamd_fasttext_predict_result_t)res; + return (rspamd_fasttext_predict_result_t) res; #endif } @@ -209,8 +214,7 @@ void rspamd_lang_detection_fasttext_destroy(void *ud) } -guint -rspamd_lang_detection_fasttext_get_nlangs(rspamd_fasttext_predict_result_t res) +guint rspamd_lang_detection_fasttext_get_nlangs(rspamd_fasttext_predict_result_t res) { #ifdef WITH_FASTTEXT auto *real_res = FASTTEXT_RESULT_TO_C_API(res); @@ -240,8 +244,7 @@ rspamd_lang_detection_fasttext_get_lang(rspamd_fasttext_predict_result_t res, un return nullptr; } -float -rspamd_lang_detection_fasttext_get_prob(rspamd_fasttext_predict_result_t res, unsigned int idx) +float rspamd_lang_detection_fasttext_get_prob(rspamd_fasttext_predict_result_t res, unsigned int idx) { #ifdef WITH_FASTTEXT auto *real_res = FASTTEXT_RESULT_TO_C_API(res); diff --git a/src/libmime/lang_detection_fasttext.h b/src/libmime/lang_detection_fasttext.h index 4a9f45c21..c8710d3c4 100644 --- a/src/libmime/lang_detection_fasttext.h +++ b/src/libmime/lang_detection_fasttext.h @@ -26,7 +26,7 @@ struct rspamd_task; /* for logging */ * @param cfg * @return opaque pointer */ -void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg); +void *rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg); /** * Check if fasttext language detector is enabled @@ -43,7 +43,7 @@ bool rspamd_lang_detection_fasttext_is_enabled(void *ud); char *rspamd_lang_detection_fasttext_show_info(void *ud); -typedef void * rspamd_fasttext_predict_result_t; +typedef void *rspamd_fasttext_predict_result_t; /** * Detect language using fasttext * @param ud opaque pointer @@ -53,7 +53,7 @@ typedef void * rspamd_fasttext_predict_result_t; * @return TRUE if language is detected */ rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud, - struct rspamd_task *task, GArray *utf_words, int k); + struct rspamd_task *task, GArray *utf_words, int k); /** * Get number of languages detected diff --git a/src/libmime/message.c b/src/libmime/message.c index 508ea27ea..327b546e7 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -47,48 +47,48 @@ #define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_TEXT_PART_FLAG_UTF) static const gchar gtube_pattern_reject[] = "XJS*C4JDBQADN1.NSBN3*2IDNEN*" - "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; + "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; static const gchar gtube_pattern_add_header[] = "YJS*C4JDBQADN1.NSBN3*2IDNEN*" - "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; + "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; static const gchar gtube_pattern_rewrite_subject[] = "ZJS*C4JDBQADN1.NSBN3*2IDNEN*" - "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; + "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; static const gchar gtube_pattern_no_action[] = "AJS*C4JDBQADN1.NSBN3*2IDNEN*" - "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; + "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X"; struct rspamd_multipattern *gtube_matcher = NULL; static const guint64 words_hash_seed = 0xdeadbabe; static void -free_byte_array_callback (void *pointer) +free_byte_array_callback(void *pointer) { GByteArray *arr = (GByteArray *) pointer; - g_byte_array_free (arr, TRUE); + g_byte_array_free(arr, TRUE); } static void -rspamd_mime_part_extract_words (struct rspamd_task *task, - struct rspamd_mime_text_part *part) +rspamd_mime_part_extract_words(struct rspamd_task *task, + struct rspamd_mime_text_part *part) { rspamd_stat_token_t *w; guint i, total_len = 0, short_len = 0; if (part->utf_words) { - rspamd_stem_words (part->utf_words, task->task_pool, part->language, - task->lang_det); + rspamd_stem_words(part->utf_words, task->task_pool, part->language, + task->lang_det); for (i = 0; i < part->utf_words->len; i++) { guint64 h; - w = &g_array_index (part->utf_words, rspamd_stat_token_t, i); + w = &g_array_index(part->utf_words, rspamd_stat_token_t, i); if (w->stemmed.len > 0) { /* * We use static hash seed if we would want to use that in shingles * computation in future */ - h = rspamd_cryptobox_fast_hash_specific ( - RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT, - w->stemmed.begin, w->stemmed.len, words_hash_seed); - g_array_append_val (part->normalized_hashes, h); + h = rspamd_cryptobox_fast_hash_specific( + RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT, + w->stemmed.begin, w->stemmed.len, words_hash_seed); + g_array_append_val(part->normalized_hashes, h); total_len += w->stemmed.len; if (w->stemmed.len <= 3) { @@ -97,13 +97,13 @@ rspamd_mime_part_extract_words (struct rspamd_task *task, if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT && !(w->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED)) { - part->nwords ++; + part->nwords++; } } - if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE| - RSPAMD_STAT_TOKEN_FLAG_NORMALISED| - RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES)) { + if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE | + RSPAMD_STAT_TOKEN_FLAG_NORMALISED | + RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES)) { task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE; } } @@ -111,29 +111,29 @@ rspamd_mime_part_extract_words (struct rspamd_task *task, if (part->utf_words->len) { gdouble *avg_len_p, *short_len_p; - avg_len_p = rspamd_mempool_get_variable (task->task_pool, - RSPAMD_MEMPOOL_AVG_WORDS_LEN); + avg_len_p = rspamd_mempool_get_variable(task->task_pool, + RSPAMD_MEMPOOL_AVG_WORDS_LEN); if (avg_len_p == NULL) { - avg_len_p = rspamd_mempool_alloc (task->task_pool, - sizeof (double)); + avg_len_p = rspamd_mempool_alloc(task->task_pool, + sizeof(double)); *avg_len_p = total_len; - rspamd_mempool_set_variable (task->task_pool, - RSPAMD_MEMPOOL_AVG_WORDS_LEN, avg_len_p, NULL); + rspamd_mempool_set_variable(task->task_pool, + RSPAMD_MEMPOOL_AVG_WORDS_LEN, avg_len_p, NULL); } else { *avg_len_p += total_len; } - short_len_p = rspamd_mempool_get_variable (task->task_pool, - RSPAMD_MEMPOOL_SHORT_WORDS_CNT); + short_len_p = rspamd_mempool_get_variable(task->task_pool, + RSPAMD_MEMPOOL_SHORT_WORDS_CNT); if (short_len_p == NULL) { - short_len_p = rspamd_mempool_alloc (task->task_pool, - sizeof (double)); + short_len_p = rspamd_mempool_alloc(task->task_pool, + sizeof(double)); *short_len_p = short_len; - rspamd_mempool_set_variable (task->task_pool, - RSPAMD_MEMPOOL_SHORT_WORDS_CNT, avg_len_p, NULL); + rspamd_mempool_set_variable(task->task_pool, + RSPAMD_MEMPOOL_SHORT_WORDS_CNT, avg_len_p, NULL); } else { *short_len_p += short_len; @@ -143,12 +143,12 @@ rspamd_mime_part_extract_words (struct rspamd_task *task, } static void -rspamd_mime_part_create_words (struct rspamd_task *task, - struct rspamd_mime_text_part *part) +rspamd_mime_part_create_words(struct rspamd_task *task, + struct rspamd_mime_text_part *part) { enum rspamd_tokenize_type tok_type; - if (IS_TEXT_PART_UTF (part)) { + if (IS_TEXT_PART_UTF(part)) { #if U_ICU_VERSION_MAJOR_NUM < 50 /* Hack to prevent hang with Thai in old libicu */ @@ -160,18 +160,18 @@ rspamd_mime_part_create_words (struct rspamd_task *task, tok_type = RSPAMD_TOKENIZE_UTF; while (p + i < end) { - U8_NEXT (p, i, part->utf_stripped_content->len, uc); + U8_NEXT(p, i, part->utf_stripped_content->len, uc); if (((gint32) uc) < 0) { tok_type = RSPAMD_TOKENIZE_RAW; break; } - if (u_isalpha (uc)) { - sc = ublock_getCode (uc); + if (u_isalpha(uc)) { + sc = ublock_getCode(uc); if (sc == UBLOCK_THAI) { - msg_info_task ("enable workaround for Thai characters for old libicu"); + msg_info_task("enable workaround for Thai characters for old libicu"); tok_type = RSPAMD_TOKENIZE_RAW; break; } @@ -185,38 +185,37 @@ rspamd_mime_part_create_words (struct rspamd_task *task, tok_type = RSPAMD_TOKENIZE_RAW; } - part->utf_words = rspamd_tokenize_text ( - part->utf_stripped_content->data, - part->utf_stripped_content->len, - &part->utf_stripped_text, - tok_type, task->cfg, - part->exceptions, - NULL, - NULL, - task->task_pool); + part->utf_words = rspamd_tokenize_text( + part->utf_stripped_content->data, + part->utf_stripped_content->len, + &part->utf_stripped_text, + tok_type, task->cfg, + part->exceptions, + NULL, + NULL, + task->task_pool); if (part->utf_words) { - part->normalized_hashes = g_array_sized_new (FALSE, FALSE, - sizeof (guint64), part->utf_words->len); - rspamd_normalize_words (part->utf_words, task->task_pool); + part->normalized_hashes = g_array_sized_new(FALSE, FALSE, + sizeof(guint64), part->utf_words->len); + rspamd_normalize_words(part->utf_words, task->task_pool); } - } static void -rspamd_mime_part_detect_language (struct rspamd_task *task, - struct rspamd_mime_text_part *part) +rspamd_mime_part_detect_language(struct rspamd_task *task, + struct rspamd_mime_text_part *part) { struct rspamd_lang_detector_res *lang; - if (!IS_TEXT_PART_EMPTY (part) && part->utf_words && part->utf_words->len > 0 && + if (!IS_TEXT_PART_EMPTY(part) && part->utf_words && part->utf_words->len > 0 && task->lang_det) { - if (rspamd_language_detector_detect (task, task->lang_det, part)) { - lang = g_ptr_array_index (part->languages, 0); + if (rspamd_language_detector_detect(task, task->lang_det, part)) { + lang = g_ptr_array_index(part->languages, 0); part->language = lang->lang; - msg_info_task ("detected part language: %s", part->language); + msg_info_task("detected part language: %s", part->language); } else { part->language = "en"; /* Safe fallback */ @@ -225,12 +224,12 @@ rspamd_mime_part_detect_language (struct rspamd_task *task, } static void -rspamd_strip_newlines_parse (struct rspamd_task *task, - const gchar *begin, const gchar *pe, - struct rspamd_mime_text_part *part) +rspamd_strip_newlines_parse(struct rspamd_task *task, + const gchar *begin, const gchar *pe, + struct rspamd_mime_text_part *part) { const gchar *p = begin, *c = begin; - gboolean crlf_added = FALSE, is_utf = IS_TEXT_PART_UTF (part); + gboolean crlf_added = FALSE, is_utf = IS_TEXT_PART_UTF(part); gboolean url_open_bracket = FALSE; UChar32 uc; @@ -243,29 +242,29 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, while (p < pe) { if (U8_IS_LEAD(*p) && is_utf) { gint32 off = p - begin; - U8_NEXT (begin, off, pe - begin, uc); + U8_NEXT(begin, off, pe - begin, uc); if (uc != -1) { while (p < pe && off < (pe - begin)) { - if (IS_ZERO_WIDTH_SPACE (uc)) { + if (IS_ZERO_WIDTH_SPACE(uc)) { /* Invisible space ! */ task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE; - part->spaces ++; + part->spaces++; if (p > c) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *) c, p - c); + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) c, p - c); c = begin + off; p = c; } - U8_NEXT (begin, off, pe - begin, uc); + U8_NEXT(begin, off, pe - begin, uc); - if (!IS_ZERO_WIDTH_SPACE (uc)) { + if (!IS_ZERO_WIDTH_SPACE(uc)) { break; } - part->double_spaces ++; + part->double_spaces++; p = begin + off; c = p; } @@ -276,7 +275,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, } } - if (G_UNLIKELY (p >= pe)) { + if (G_UNLIKELY(p >= pe)) { /* * This is reached when there is a utf8 part and we * have zero width spaces at the end of the text @@ -290,8 +289,8 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, case normal_char: state = seen_cr; if (p > c) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *)c, p - c); + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) c, p - c); } crlf_added = FALSE; @@ -300,15 +299,15 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, case seen_cr: /* Double \r\r */ if (!crlf_added) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *)" ", 1); + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) " ", 1); crlf_added = TRUE; - g_ptr_array_add (part->newlines, - (((gpointer) (goffset) (part->utf_stripped_content->len)))); + g_ptr_array_add(part->newlines, + (((gpointer) (goffset) (part->utf_stripped_content->len)))); } - part->nlines ++; - part->empty_lines ++; + part->nlines++; + part->empty_lines++; c = p + 1; break; case seen_lf: @@ -320,7 +319,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, url_open_bracket = FALSE; - p ++; + p++; } else if (*p == '\n') { switch (state) { @@ -328,17 +327,17 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, state = seen_lf; if (p > c) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *)c, p - c); + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) c, p - c); } c = p + 1; - if (IS_TEXT_PART_HTML (part) || !url_open_bracket) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *)" ", 1); - g_ptr_array_add (part->newlines, - (((gpointer) (goffset) (part->utf_stripped_content->len)))); + if (IS_TEXT_PART_HTML(part) || !url_open_bracket) { + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) " ", 1); + g_ptr_array_add(part->newlines, + (((gpointer) (goffset) (part->utf_stripped_content->len)))); crlf_added = TRUE; } else { @@ -349,14 +348,14 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, case seen_cr: /* \r\n */ if (!crlf_added) { - if (IS_TEXT_PART_HTML (part) || !url_open_bracket) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *) " ", 1); + if (IS_TEXT_PART_HTML(part) || !url_open_bracket) { + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) " ", 1); crlf_added = TRUE; } - g_ptr_array_add (part->newlines, - (((gpointer) (goffset) (part->utf_stripped_content->len)))); + g_ptr_array_add(part->newlines, + (((gpointer) (goffset) (part->utf_stripped_content->len)))); } c = p + 1; @@ -366,22 +365,22 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, case seen_lf: /* Double \n\n */ if (!crlf_added) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *)" ", 1); + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) " ", 1); crlf_added = TRUE; - g_ptr_array_add (part->newlines, - (((gpointer) (goffset) (part->utf_stripped_content->len)))); + g_ptr_array_add(part->newlines, + (((gpointer) (goffset) (part->utf_stripped_content->len)))); } part->nlines++; - part->empty_lines ++; + part->empty_lines++; c = p + 1; break; } url_open_bracket = FALSE; - p ++; + p++; } else { if ((*p) == '<') { @@ -394,54 +393,54 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, switch (state) { case normal_char: if (*p == ' ') { - part->spaces ++; + part->spaces++; if (p > begin && *(p - 1) == ' ') { - part->double_spaces ++; + part->double_spaces++; } } else { - part->non_spaces ++; + part->non_spaces++; if ((*p) & 0x80) { - part->non_ascii_chars ++; + part->non_ascii_chars++; } else { - if (g_ascii_isupper (*p)) { - part->capital_letters ++; + if (g_ascii_isupper(*p)) { + part->capital_letters++; } - else if (g_ascii_isdigit (*p)) { - part->numeric_characters ++; + else if (g_ascii_isdigit(*p)) { + part->numeric_characters++; } - part->ascii_chars ++; + part->ascii_chars++; } } break; case seen_cr: case seen_lf: - part->nlines ++; + part->nlines++; if (!crlf_added) { - g_ptr_array_add (part->newlines, - (((gpointer) (goffset) (part->utf_stripped_content->len)))); + g_ptr_array_add(part->newlines, + (((gpointer) (goffset) (part->utf_stripped_content->len)))); } /* Skip initial spaces */ if (*p == ' ') { if (!crlf_added) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *)" ", 1); + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) " ", 1); } while (p < pe && *p == ' ') { - p ++; - c ++; - part->spaces ++; + p++; + c++; + part->spaces++; } if (p < pe && (*p == '\r' || *p == '\n')) { - part->empty_lines ++; + part->empty_lines++; } } @@ -449,7 +448,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, continue; } - p ++; + p++; } } @@ -461,38 +460,38 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, switch (state) { case normal_char: - g_byte_array_append (part->utf_stripped_content, - (const guint8 *)c, p - c); + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) c, p - c); while (c < p) { if (*c == ' ') { - part->spaces ++; + part->spaces++; if (c > begin && *(c - 1) == ' ') { - part->double_spaces ++; + part->double_spaces++; } } else { - part->non_spaces ++; + part->non_spaces++; if ((*c) & 0x80) { - part->non_ascii_chars ++; + part->non_ascii_chars++; } else { - part->ascii_chars ++; + part->ascii_chars++; } } - c ++; + c++; } break; default: if (!crlf_added) { - g_byte_array_append (part->utf_stripped_content, - (const guint8 *)" ", 1); - g_ptr_array_add (part->newlines, - (((gpointer) (goffset) (part->utf_stripped_content->len)))); + g_byte_array_append(part->utf_stripped_content, + (const guint8 *) " ", 1); + g_ptr_array_add(part->newlines, + (((gpointer) (goffset) (part->utf_stripped_content->len)))); } part->nlines++; @@ -502,14 +501,14 @@ rspamd_strip_newlines_parse (struct rspamd_task *task, } static void -rspamd_u_text_dtor (void *p) +rspamd_u_text_dtor(void *p) { - utext_close ((UText *)p); + utext_close((UText *) p); } static void -rspamd_normalize_text_part (struct rspamd_task *task, - struct rspamd_mime_text_part *part) +rspamd_normalize_text_part(struct rspamd_task *task, + struct rspamd_mime_text_part *part) { const gchar *p, *end; guint i; @@ -517,63 +516,62 @@ rspamd_normalize_text_part (struct rspamd_task *task, struct rspamd_process_exception *ex; UErrorCode uc_err = U_ZERO_ERROR; - part->newlines = g_ptr_array_sized_new (128); + part->newlines = g_ptr_array_sized_new(128); - if (IS_TEXT_PART_EMPTY (part)) { - part->utf_stripped_content = g_byte_array_new (); + if (IS_TEXT_PART_EMPTY(part)) { + part->utf_stripped_content = g_byte_array_new(); } else { - part->utf_stripped_content = g_byte_array_sized_new (part->utf_content.len); + part->utf_stripped_content = g_byte_array_sized_new(part->utf_content.len); - p = (const gchar *)part->utf_content.begin; + p = (const gchar *) part->utf_content.begin; end = p + part->utf_content.len; - rspamd_strip_newlines_parse (task, p, end, part); + rspamd_strip_newlines_parse(task, p, end, part); - for (i = 0; i < part->newlines->len; i ++) { - ex = rspamd_mempool_alloc (task->task_pool, sizeof (*ex)); - off = (goffset)g_ptr_array_index (part->newlines, i); - g_ptr_array_index (part->newlines, i) = (gpointer)(goffset) - (part->utf_stripped_content->data + off); + for (i = 0; i < part->newlines->len; i++) { + ex = rspamd_mempool_alloc(task->task_pool, sizeof(*ex)); + off = (goffset) g_ptr_array_index(part->newlines, i); + g_ptr_array_index(part->newlines, i) = (gpointer) (goffset) (part->utf_stripped_content->data + off); ex->pos = off; ex->len = 0; ex->type = RSPAMD_EXCEPTION_NEWLINE; - part->exceptions = g_list_prepend (part->exceptions, ex); + part->exceptions = g_list_prepend(part->exceptions, ex); } } - if (IS_TEXT_PART_UTF (part)) { - utext_openUTF8 (&part->utf_stripped_text, - part->utf_stripped_content->data, - part->utf_stripped_content->len, - &uc_err); + if (IS_TEXT_PART_UTF(part)) { + utext_openUTF8(&part->utf_stripped_text, + part->utf_stripped_content->data, + part->utf_stripped_content->len, + &uc_err); - if (!U_SUCCESS (uc_err)) { - msg_warn_task ("cannot open text from utf content"); + if (!U_SUCCESS(uc_err)) { + msg_warn_task("cannot open text from utf content"); /* Probably, should be an assertion */ } else { - rspamd_mempool_add_destructor (task->task_pool, - rspamd_u_text_dtor, - &part->utf_stripped_text); + rspamd_mempool_add_destructor(task->task_pool, + rspamd_u_text_dtor, + &part->utf_stripped_text); } } - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t) free_byte_array_callback, - part->utf_stripped_content); - rspamd_mempool_notify_alloc (task->task_pool, - part->utf_stripped_content->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard, - part->newlines); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) free_byte_array_callback, + part->utf_stripped_content); + rspamd_mempool_notify_alloc(task->task_pool, + part->utf_stripped_content->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard, + part->newlines); } #define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) static guint -rspamd_words_levenshtein_distance (struct rspamd_task *task, - GArray *w1, GArray *w2) +rspamd_words_levenshtein_distance(struct rspamd_task *task, + GArray *w1, GArray *w2) { guint s1len, s2len, x, y, lastdiag, olddiag; guint *column, ret; @@ -585,9 +583,9 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task, s2len = w2->len; if (s1len + s2len > max_words) { - msg_info_task ("cannot direct compare multipart/alternative parts with more than %ud words in total: " - "(%ud words in one part and %ud in another)", - max_words, s1len, s2len); + msg_info_task("cannot direct compare multipart/alternative parts with more than %ud words in total: " + "(%ud words in one part and %ud in another)", + max_words, s1len, s2len); /* Use approximate comparison of number of words */ if (s1len > s2len) { @@ -598,7 +596,7 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task, } } - column = g_malloc0 ((s1len + 1) * sizeof (guint)); + column = g_malloc0((s1len + 1) * sizeof(guint)); for (y = 1; y <= s1len; y++) { column[y] = y; @@ -609,35 +607,35 @@ rspamd_words_levenshtein_distance (struct rspamd_task *task, for (y = 1, lastdiag = x - 1; y <= s1len; y++) { olddiag = column[y]; - h1 = g_array_index (w1, guint64, y - 1); - h2 = g_array_index (w2, guint64, x - 1); + h1 = g_array_index(w1, guint64, y - 1); + h2 = g_array_index(w2, guint64, x - 1); eq = (h1 == h2) ? 1 : 0; /* * Cost of replacement is twice higher than cost of add/delete * to calculate percentage properly */ - column[y] = MIN3 (column[y] + 1, column[y - 1] + 1, - lastdiag + (eq * 2)); + column[y] = MIN3(column[y] + 1, column[y - 1] + 1, + lastdiag + (eq * 2)); lastdiag = olddiag; } } ret = column[s1len]; - g_free (column); + g_free(column); return ret; } static gint -rspamd_multipattern_gtube_cb (struct rspamd_multipattern *mp, - guint strnum, - gint match_start, - gint match_pos, - const gchar *text, - gsize len, - void *context) +rspamd_multipattern_gtube_cb(struct rspamd_multipattern *mp, + guint strnum, + gint match_start, + gint match_pos, + const gchar *text, + gsize len, + void *context) { - struct rspamd_task *task = (struct rspamd_task *)context; + struct rspamd_task *task = (struct rspamd_task *) context; if (strnum > 0) { if (task->cfg->enable_test_patterns) { @@ -651,59 +649,59 @@ rspamd_multipattern_gtube_cb (struct rspamd_multipattern *mp, } static enum rspamd_action_type -rspamd_check_gtube (struct rspamd_task *task, struct rspamd_mime_text_part *part) +rspamd_check_gtube(struct rspamd_task *task, struct rspamd_mime_text_part *part) { static const gsize max_check_size = 8 * 1024; gint ret; enum rspamd_action_type act = METRIC_ACTION_NOACTION; - g_assert (part != NULL); + g_assert(part != NULL); if (gtube_matcher == NULL) { - gtube_matcher = rspamd_multipattern_create (RSPAMD_MULTIPATTERN_DEFAULT); - - rspamd_multipattern_add_pattern (gtube_matcher, - gtube_pattern_reject, - RSPAMD_MULTIPATTERN_DEFAULT); - rspamd_multipattern_add_pattern (gtube_matcher, - gtube_pattern_add_header, - RSPAMD_MULTIPATTERN_DEFAULT); - rspamd_multipattern_add_pattern (gtube_matcher, - gtube_pattern_rewrite_subject, - RSPAMD_MULTIPATTERN_DEFAULT); - rspamd_multipattern_add_pattern (gtube_matcher, - gtube_pattern_no_action, - RSPAMD_MULTIPATTERN_DEFAULT); + gtube_matcher = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT); + + rspamd_multipattern_add_pattern(gtube_matcher, + gtube_pattern_reject, + RSPAMD_MULTIPATTERN_DEFAULT); + rspamd_multipattern_add_pattern(gtube_matcher, + gtube_pattern_add_header, + RSPAMD_MULTIPATTERN_DEFAULT); + rspamd_multipattern_add_pattern(gtube_matcher, + gtube_pattern_rewrite_subject, + RSPAMD_MULTIPATTERN_DEFAULT); + rspamd_multipattern_add_pattern(gtube_matcher, + gtube_pattern_no_action, + RSPAMD_MULTIPATTERN_DEFAULT); GError *err = NULL; - rspamd_multipattern_compile (gtube_matcher, &err); + rspamd_multipattern_compile(gtube_matcher, &err); if (err != NULL) { /* It will be expensive, but I don't care, still better than to abort */ - msg_err ("cannot compile gtube matcher: %s", err->message); - g_error_free (err); + msg_err("cannot compile gtube matcher: %s", err->message); + g_error_free(err); } } - if (part->utf_content.len >= sizeof (gtube_pattern_reject) && - part->utf_content.len <= max_check_size) { - if ((ret = rspamd_multipattern_lookup (gtube_matcher, part->utf_content.begin, - part->utf_content.len, - rspamd_multipattern_gtube_cb, task, NULL)) > 0) { + if (part->utf_content.len >= sizeof(gtube_pattern_reject) && + part->utf_content.len <= max_check_size) { + if ((ret = rspamd_multipattern_lookup(gtube_matcher, part->utf_content.begin, + part->utf_content.len, + rspamd_multipattern_gtube_cb, task, NULL)) > 0) { switch (ret) { case 1: act = METRIC_ACTION_REJECT; break; case 2: - g_assert (task->cfg->enable_test_patterns); + g_assert(task->cfg->enable_test_patterns); act = METRIC_ACTION_ADD_HEADER; break; case 3: - g_assert (task->cfg->enable_test_patterns); + g_assert(task->cfg->enable_test_patterns); act = METRIC_ACTION_REWRITE_SUBJECT; break; case 4: - g_assert (task->cfg->enable_test_patterns); + g_assert(task->cfg->enable_test_patterns); act = METRIC_ACTION_NOACTION; break; } @@ -711,10 +709,10 @@ rspamd_check_gtube (struct rspamd_task *task, struct rspamd_mime_text_part *part if (ret != 0) { task->flags |= RSPAMD_TASK_FLAG_SKIP; task->flags |= RSPAMD_TASK_FLAG_GTUBE; - msg_info_task ( - "gtube %s pattern has been found in part of length %uz", - rspamd_action_to_str (act), - part->utf_content.len); + msg_info_task( + "gtube %s pattern has been found in part of length %uz", + rspamd_action_to_str(act), + part->utf_content.len); } } } @@ -723,7 +721,7 @@ rspamd_check_gtube (struct rspamd_task *task, struct rspamd_mime_text_part *part } static gint -exceptions_compare_func (gconstpointer a, gconstpointer b) +exceptions_compare_func(gconstpointer a, gconstpointer b) { const struct rspamd_process_exception *ea = a, *eb = b; @@ -731,8 +729,8 @@ exceptions_compare_func (gconstpointer a, gconstpointer b) } static gboolean -rspamd_message_process_plain_text_part (struct rspamd_task *task, - struct rspamd_mime_text_part *text_part) +rspamd_message_process_plain_text_part(struct rspamd_task *task, + struct rspamd_mime_text_part *text_part) { if (text_part->parsed.len == 0) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY; @@ -740,11 +738,11 @@ rspamd_message_process_plain_text_part (struct rspamd_task *task, return TRUE; } - rspamd_mime_text_part_maybe_convert (task, text_part); + rspamd_mime_text_part_maybe_convert(task, text_part); if (text_part->utf_raw_content != NULL) { /* Just have the same content */ - text_part->utf_content.begin = (const gchar *)text_part->utf_raw_content->data; + text_part->utf_content.begin = (const gchar *) text_part->utf_raw_content->data; text_part->utf_content.len = text_part->utf_raw_content->len; } else { @@ -762,9 +760,9 @@ rspamd_message_process_plain_text_part (struct rspamd_task *task, } static gboolean -rspamd_message_process_html_text_part (struct rspamd_task *task, - struct rspamd_mime_text_part *text_part, - uint16_t *cur_url_order) +rspamd_message_process_html_text_part(struct rspamd_task *task, + struct rspamd_mime_text_part *text_part, + uint16_t *cur_url_order) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_HTML; @@ -774,21 +772,21 @@ rspamd_message_process_html_text_part (struct rspamd_task *task, return TRUE; } - rspamd_mime_text_part_maybe_convert (task, text_part); + rspamd_mime_text_part_maybe_convert(task, text_part); if (text_part->utf_raw_content == NULL) { return FALSE; } - text_part->html = rspamd_html_process_part_full ( - task, - text_part->utf_raw_content, - &text_part->exceptions, - MESSAGE_FIELD (task, urls), - text_part->mime_part->urls, - task->cfg ? task->cfg->enable_css_parser : true, - cur_url_order); + text_part->html = rspamd_html_process_part_full( + task, + text_part->utf_raw_content, + &text_part->exceptions, + MESSAGE_FIELD(task, urls), + text_part->mime_part->urls, + task->cfg ? task->cfg->enable_css_parser : true, + cur_url_order); rspamd_html_get_parsed_content(text_part->html, &text_part->utf_content); if (text_part->utf_content.len == 0) { @@ -805,13 +803,13 @@ enum rspamd_message_part_is_text_result { }; static enum rspamd_message_part_is_text_result -rspamd_message_part_can_be_parsed_as_text (struct rspamd_task *task, - struct rspamd_mime_part *mime_part) +rspamd_message_part_can_be_parsed_as_text(struct rspamd_task *task, + struct rspamd_mime_part *mime_part) { enum rspamd_message_part_is_text_result res = RSPAMD_MESSAGE_PART_IS_NOT_TEXT; if ((mime_part->ct && (mime_part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) || - (mime_part->detected_type && strcmp (mime_part->detected_type, "text") == 0)) { + (mime_part->detected_type && strcmp(mime_part->detected_type, "text") == 0)) { res = RSPAMD_MESSAGE_PART_IS_TEXT_PLAIN; rspamd_ftok_t html_tok, xhtml_tok; @@ -821,10 +819,10 @@ rspamd_message_part_can_be_parsed_as_text (struct rspamd_task *task, xhtml_tok.begin = "xhtml"; xhtml_tok.len = 5; - if (rspamd_ftok_casecmp (&mime_part->ct->subtype, &html_tok) == 0 || - rspamd_ftok_casecmp (&mime_part->ct->subtype, &xhtml_tok) == 0 || + if (rspamd_ftok_casecmp(&mime_part->ct->subtype, &html_tok) == 0 || + rspamd_ftok_casecmp(&mime_part->ct->subtype, &xhtml_tok) == 0 || (mime_part->detected_ext && - strcmp (mime_part->detected_ext, "html") == 0)) { + strcmp(mime_part->detected_ext, "html") == 0)) { res = RSPAMD_MESSAGE_PART_IS_TEXT_HTML; } } @@ -833,7 +831,7 @@ rspamd_message_part_can_be_parsed_as_text (struct rspamd_task *task, if (res != RSPAMD_MESSAGE_PART_IS_NOT_TEXT && (mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT)) { if (!task->cfg->check_text_attachements) { - debug_task ("skip attachments for checking as text parts"); + debug_task("skip attachments for checking as text parts"); return RSPAMD_MESSAGE_PART_IS_NOT_TEXT; } } @@ -842,10 +840,10 @@ rspamd_message_part_can_be_parsed_as_text (struct rspamd_task *task, } static gboolean -rspamd_message_process_text_part_maybe (struct rspamd_task *task, - struct rspamd_mime_part *mime_part, - enum rspamd_message_part_is_text_result is_text, - uint16_t *cur_url_order) +rspamd_message_process_text_part_maybe(struct rspamd_task *task, + struct rspamd_mime_part *mime_part, + enum rspamd_message_part_is_text_result is_text, + uint16_t *cur_url_order) { struct rspamd_mime_text_part *text_part; guint flags = 0; @@ -856,105 +854,105 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, flags |= RSPAMD_MIME_TEXT_PART_ATTACHMENT; } - text_part = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_text_part)); + text_part = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_text_part)); text_part->mime_part = mime_part; text_part->raw.begin = mime_part->raw_data.begin; text_part->raw.len = mime_part->raw_data.len; text_part->parsed.begin = mime_part->parsed_data.begin; text_part->parsed.len = mime_part->parsed_data.len; - text_part->utf_stripped_text = (UText)UTEXT_INITIALIZER; + text_part->utf_stripped_text = (UText) UTEXT_INITIALIZER; text_part->flags |= flags; if (is_text == RSPAMD_MESSAGE_PART_IS_TEXT_HTML) { - if (!rspamd_message_process_html_text_part (task, text_part, cur_url_order)) { + if (!rspamd_message_process_html_text_part(task, text_part, cur_url_order)) { return FALSE; } } else { - if (!rspamd_message_process_plain_text_part (task, text_part)) { + if (!rspamd_message_process_plain_text_part(task, text_part)) { return FALSE; } } - g_ptr_array_add (MESSAGE_FIELD (task, text_parts), text_part); + g_ptr_array_add(MESSAGE_FIELD(task, text_parts), text_part); mime_part->part_type = RSPAMD_MIME_PART_TEXT; mime_part->specific.txt = text_part; - act = rspamd_check_gtube (task, text_part); + act = rspamd_check_gtube(task, text_part); if (act != METRIC_ACTION_NOACTION) { struct rspamd_action *action; gdouble score = NAN; - action = rspamd_config_get_action_by_type (task->cfg, act); + action = rspamd_config_get_action_by_type(task->cfg, act); if (action) { score = action->threshold; - rspamd_add_passthrough_result (task, action, - RSPAMD_PASSTHROUGH_CRITICAL, - score, "Gtube pattern", - "GTUBE", 0, NULL); + rspamd_add_passthrough_result(task, action, + RSPAMD_PASSTHROUGH_CRITICAL, + score, "Gtube pattern", + "GTUBE", 0, NULL); } - rspamd_task_insert_result (task, GTUBE_SYMBOL, 0, NULL); + rspamd_task_insert_result(task, GTUBE_SYMBOL, 0, NULL); return TRUE; } /* Post process part */ - rspamd_normalize_text_part (task, text_part); + rspamd_normalize_text_part(task, text_part); - if (!IS_TEXT_PART_HTML (text_part)) { + if (!IS_TEXT_PART_HTML(text_part)) { if (mime_part->parent_part) { struct rspamd_mime_part *parent = mime_part->parent_part; - if (IS_PART_MULTIPART (parent) && parent->specific.mp->children->len == 2) { + if (IS_PART_MULTIPART(parent) && parent->specific.mp->children->len == 2) { /* * Use strict extraction mode: we will extract missing urls from * an html part if needed */ - rspamd_url_text_extract (task->task_pool, task, text_part, cur_url_order, - RSPAMD_URL_FIND_STRICT); + rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order, + RSPAMD_URL_FIND_STRICT); } else { /* * Fall back to full text extraction using TLD patterns */ - rspamd_url_text_extract (task->task_pool, task, text_part, cur_url_order, - RSPAMD_URL_FIND_ALL); + rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order, + RSPAMD_URL_FIND_ALL); } } else { /* * Fall back to full text extraction using TLD patterns */ - rspamd_url_text_extract (task->task_pool, task, text_part, cur_url_order, - RSPAMD_URL_FIND_ALL); + rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order, + RSPAMD_URL_FIND_ALL); } } else { - rspamd_url_text_extract (task->task_pool, task, text_part, cur_url_order, - RSPAMD_URL_FIND_STRICT); + rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order, + RSPAMD_URL_FIND_STRICT); } if (text_part->exceptions) { - text_part->exceptions = g_list_sort (text_part->exceptions, - exceptions_compare_func); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)g_list_free, - text_part->exceptions); + text_part->exceptions = g_list_sort(text_part->exceptions, + exceptions_compare_func); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) g_list_free, + text_part->exceptions); } - rspamd_mime_part_create_words (task, text_part); + rspamd_mime_part_create_words(task, text_part); return TRUE; } /* Creates message from various data using libmagic to detect type */ static void -rspamd_message_from_data (struct rspamd_task *task, const guchar *start, - gsize len) +rspamd_message_from_data(struct rspamd_task *task, const guchar *start, + gsize len) { struct rspamd_content_type *ct = NULL; struct rspamd_mime_part *part; @@ -963,87 +961,87 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, rspamd_ftok_t srch, *tok; gchar cdbuf[1024]; - g_assert (start != NULL); + g_assert(start != NULL); - part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part)); + part = rspamd_mempool_alloc0(task->task_pool, sizeof(*part)); part->raw_data.begin = start; part->raw_data.len = len; part->parsed_data.begin = start; part->parsed_data.len = len; - part->part_number = MESSAGE_FIELD (task, parts)->len; - part->urls = g_ptr_array_new (); - part->raw_headers = rspamd_message_headers_new (); + part->part_number = MESSAGE_FIELD(task, parts)->len; + part->urls = g_ptr_array_new(); + part->raw_headers = rspamd_message_headers_new(); part->headers_order = NULL; - tok = rspamd_task_get_request_header (task, "Content-Type"); + tok = rspamd_task_get_request_header(task, "Content-Type"); if (tok) { /* We have Content-Type defined */ - ct = rspamd_content_type_parse (tok->begin, tok->len, - task->task_pool); + ct = rspamd_content_type_parse(tok->begin, tok->len, + task->task_pool); part->ct = ct; } else if (task->cfg && task->cfg->libs_ctx) { lua_State *L = task->cfg->lua_state; - if (rspamd_lua_require_function (L, - "lua_magic", "detect_mime_part")) { + if (rspamd_lua_require_function(L, + "lua_magic", "detect_mime_part")) { struct rspamd_mime_part **pmime; struct rspamd_task **ptask; - pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *)); - rspamd_lua_setclass (L, "rspamd{mimepart}", -1); + pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *)); + rspamd_lua_setclass(L, "rspamd{mimepart}", -1); *pmime = part; - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); - rspamd_lua_setclass (L, "rspamd{task}", -1); + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); + rspamd_lua_setclass(L, "rspamd{task}", -1); *ptask = task; - if (lua_pcall (L, 2, 2, 0) != 0) { - msg_err_task ("cannot detect type: %s", lua_tostring (L, -1)); + if (lua_pcall(L, 2, 2, 0) != 0) { + msg_err_task("cannot detect type: %s", lua_tostring(L, -1)); } else { - if (lua_istable (L, -1)) { - lua_pushstring (L, "ct"); - lua_gettable (L, -2); + if (lua_istable(L, -1)) { + lua_pushstring(L, "ct"); + lua_gettable(L, -2); - if (lua_isstring (L, -1)) { - mb = rspamd_mempool_strdup (task->task_pool, - lua_tostring (L, -1)); + if (lua_isstring(L, -1)) { + mb = rspamd_mempool_strdup(task->task_pool, + lua_tostring(L, -1)); } } } - lua_settop (L, 0); + lua_settop(L, 0); } else { - msg_err_task ("cannot require lua_magic.detect_mime_part"); + msg_err_task("cannot require lua_magic.detect_mime_part"); } if (mb) { srch.begin = mb; - srch.len = strlen (mb); - ct = rspamd_content_type_parse (srch.begin, srch.len, - task->task_pool); + srch.len = strlen(mb); + ct = rspamd_content_type_parse(srch.begin, srch.len, + task->task_pool); if (!part->ct) { - msg_info_task ("construct fake mime of type: %s", mb); + msg_info_task("construct fake mime of type: %s", mb); part->ct = ct; } else { /* Check sanity */ if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) { - RSPAMD_FTOK_FROM_STR (&srch, "application"); + RSPAMD_FTOK_FROM_STR(&srch, "application"); - if (rspamd_ftok_cmp (&ct->type, &srch) == 0) { - msg_info_task ("construct fake mime of type: %s", mb); + if (rspamd_ftok_cmp(&ct->type, &srch) == 0) { + msg_info_task("construct fake mime of type: %s", mb); part->ct = ct; } } else { - msg_info_task ("construct fake mime of type: %T/%T, detected %s", - &part->ct->type, &part->ct->subtype, mb); + msg_info_task("construct fake mime of type: %T/%T, detected %s", + &part->ct->type, &part->ct->subtype, mb); } } @@ -1052,100 +1050,102 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, } - tok = rspamd_task_get_request_header (task, "Filename"); + tok = rspamd_task_get_request_header(task, "Filename"); if (tok) { - rspamd_snprintf (cdbuf, sizeof (cdbuf), "inline; filename=\"%T\"", tok); + rspamd_snprintf(cdbuf, sizeof(cdbuf), "inline; filename=\"%T\"", tok); } else { - rspamd_snprintf (cdbuf, sizeof (cdbuf), "inline"); + rspamd_snprintf(cdbuf, sizeof(cdbuf), "inline"); } - part->cd = rspamd_content_disposition_parse (cdbuf, strlen (cdbuf), - task->task_pool); + part->cd = rspamd_content_disposition_parse(cdbuf, strlen(cdbuf), + task->task_pool); - g_ptr_array_add (MESSAGE_FIELD (task, parts), part); - rspamd_mime_parser_calc_digest (part); + g_ptr_array_add(MESSAGE_FIELD(task, parts), part); + rspamd_mime_parser_calc_digest(part); /* Generate message ID */ - mid = rspamd_mime_message_id_generate ("localhost.localdomain"); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t) g_free, mid); - MESSAGE_FIELD (task, message_id) = mid; + mid = rspamd_mime_message_id_generate("localhost.localdomain"); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) g_free, mid); + MESSAGE_FIELD(task, message_id) = mid; task->queue_id = mid; } static void -rspamd_message_dtor (struct rspamd_message *msg) +rspamd_message_dtor(struct rspamd_message *msg) { guint i; struct rspamd_mime_part *p; struct rspamd_mime_text_part *tp; - PTR_ARRAY_FOREACH (msg->parts, i, p) { + PTR_ARRAY_FOREACH(msg->parts, i, p) + { if (p->raw_headers) { - rspamd_message_headers_unref (p->raw_headers); + rspamd_message_headers_unref(p->raw_headers); } - if (IS_PART_MULTIPART (p)) { + if (IS_PART_MULTIPART(p)) { if (p->specific.mp->children) { - g_ptr_array_free (p->specific.mp->children, TRUE); + g_ptr_array_free(p->specific.mp->children, TRUE); } } if (p->part_type == RSPAMD_MIME_PART_CUSTOM_LUA && - p->specific.lua_specific.cbref != -1) { - luaL_unref (msg->task->cfg->lua_state, - LUA_REGISTRYINDEX, - p->specific.lua_specific.cbref); + p->specific.lua_specific.cbref != -1) { + luaL_unref(msg->task->cfg->lua_state, + LUA_REGISTRYINDEX, + p->specific.lua_specific.cbref); } if (p->urls) { - g_ptr_array_unref (p->urls); + g_ptr_array_unref(p->urls); } } - PTR_ARRAY_FOREACH (msg->text_parts, i, tp) { + PTR_ARRAY_FOREACH(msg->text_parts, i, tp) + { if (tp->utf_words) { - g_array_free (tp->utf_words, TRUE); + g_array_free(tp->utf_words, TRUE); } if (tp->normalized_hashes) { - g_array_free (tp->normalized_hashes, TRUE); + g_array_free(tp->normalized_hashes, TRUE); } if (tp->languages) { - g_ptr_array_unref (tp->languages); + g_ptr_array_unref(tp->languages); } } - rspamd_message_headers_unref (msg->raw_headers); + rspamd_message_headers_unref(msg->raw_headers); - g_ptr_array_unref (msg->text_parts); - g_ptr_array_unref (msg->parts); + g_ptr_array_unref(msg->text_parts); + g_ptr_array_unref(msg->parts); - kh_destroy (rspamd_url_hash, msg->urls); + kh_destroy(rspamd_url_hash, msg->urls); } -struct rspamd_message* -rspamd_message_new (struct rspamd_task *task) +struct rspamd_message * +rspamd_message_new(struct rspamd_task *task) { struct rspamd_message *msg; - msg = rspamd_mempool_alloc0 (task->task_pool, sizeof (*msg)); + msg = rspamd_mempool_alloc0(task->task_pool, sizeof(*msg)); - msg->raw_headers = rspamd_message_headers_new (); - msg->urls = kh_init (rspamd_url_hash); - msg->parts = g_ptr_array_sized_new (4); - msg->text_parts = g_ptr_array_sized_new (2); + msg->raw_headers = rspamd_message_headers_new(); + msg->urls = kh_init(rspamd_url_hash); + msg->parts = g_ptr_array_sized_new(4); + msg->text_parts = g_ptr_array_sized_new(2); msg->task = task; - REF_INIT_RETAIN (msg, rspamd_message_dtor); + REF_INIT_RETAIN(msg, rspamd_message_dtor); return msg; } gboolean -rspamd_message_parse (struct rspamd_task *task) +rspamd_message_parse(struct rspamd_task *task) { const gchar *p; gsize len; @@ -1153,7 +1153,7 @@ rspamd_message_parse (struct rspamd_task *task) GError *err = NULL; guint64 n[2], seed; - if (RSPAMD_TASK_IS_EMPTY (task)) { + if (RSPAMD_TASK_IS_EMPTY(task)) { /* Don't do anything with empty task */ task->flags |= RSPAMD_TASK_FLAG_SKIP_PROCESS; return TRUE; @@ -1163,9 +1163,9 @@ rspamd_message_parse (struct rspamd_task *task) len = task->msg.len; /* Skip any space characters to avoid some bad messages to be unparsed */ - while (len > 0 && g_ascii_isspace (*p)) { - p ++; - len --; + while (len > 0 && g_ascii_isspace(*p)) { + p++; + len--; } /* @@ -1174,20 +1174,20 @@ rspamd_message_parse (struct rspamd_task *task) * * So we check if a task has this line to avoid possible issues */ - if (len > sizeof ("From ") - 1) { - if (memcmp (p, "From ", sizeof ("From ") - 1) == 0) { + if (len > sizeof("From ") - 1) { + if (memcmp(p, "From ", sizeof("From ") - 1) == 0) { /* Skip to CRLF */ - msg_info_task ("mailbox input detected, enable workaround"); - p += sizeof ("From ") - 1; - len -= sizeof ("From ") - 1; + msg_info_task("mailbox input detected, enable workaround"); + p += sizeof("From ") - 1; + len -= sizeof("From ") - 1; while (len > 0 && *p != '\n') { - p ++; - len --; + p++; + len--; } - while (len > 0 && g_ascii_isspace (*p)) { - p ++; - len --; + while (len > 0 && g_ascii_isspace(*p)) { + p++; + len--; } } } @@ -1197,24 +1197,24 @@ rspamd_message_parse (struct rspamd_task *task) /* Cleanup old message */ if (task->message) { - rspamd_message_unref (task->message); + rspamd_message_unref(task->message); } - task->message = rspamd_message_new (task); + task->message = rspamd_message_new(task); if (task->flags & RSPAMD_TASK_FLAG_MIME) { enum rspamd_mime_parse_error ret; - debug_task ("construct mime parser from string length %d", - (gint) task->msg.len); - ret = rspamd_mime_parse_task (task, &err); + debug_task("construct mime parser from string length %d", + (gint) task->msg.len); + ret = rspamd_mime_parse_task(task, &err); switch (ret) { case RSPAMD_MIME_PARSE_FATAL: - msg_err_task ("cannot construct mime from stream: %e", err); + msg_err_task("cannot construct mime from stream: %e", err); if (task->cfg && (!task->cfg->allow_raw_input)) { - msg_err_task ("cannot construct mime from stream"); + msg_err_task("cannot construct mime from stream"); if (err) { task->err = err; } @@ -1223,11 +1223,11 @@ rspamd_message_parse (struct rspamd_task *task) } else { task->flags &= ~RSPAMD_TASK_FLAG_MIME; - rspamd_message_from_data (task, p, len); + rspamd_message_from_data(task, p, len); } break; case RSPAMD_MIME_PARSE_NESTING: - msg_warn_task ("cannot construct full mime from stream: %e", err); + msg_warn_task("cannot construct full mime from stream: %e", err); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; break; case RSPAMD_MIME_PARSE_OK: @@ -1236,19 +1236,19 @@ rspamd_message_parse (struct rspamd_task *task) } if (err) { - g_error_free (err); + g_error_free(err); } } else { - rspamd_message_from_data (task, p, len); + rspamd_message_from_data(task, p, len); } - if (MESSAGE_FIELD (task, message_id) == NULL) { - MESSAGE_FIELD (task, message_id) = "undef"; + if (MESSAGE_FIELD(task, message_id) == NULL) { + MESSAGE_FIELD(task, message_id) = "undef"; } - debug_task ("found %ud parts in message", MESSAGE_FIELD (task, parts)->len); + debug_task("found %ud parts in message", MESSAGE_FIELD(task, parts)->len); if (task->queue_id == NULL) { task->queue_id = "undef"; } @@ -1259,48 +1259,105 @@ rspamd_message_parse (struct rspamd_task *task) /* Blake2b applied to string 'rspamd' */ static const guchar RSPAMD_ALIGNED(32) hash_key[] = { - 0xef,0x43,0xae,0x80,0xcc,0x8d,0xc3,0x4c, - 0x6f,0x1b,0xd6,0x18,0x1b,0xae,0x87,0x74, - 0x0c,0xca,0xf7,0x8e,0x5f,0x2e,0x54,0x32, - 0xf6,0x79,0xb9,0x27,0x26,0x96,0x20,0x92, - 0x70,0x07,0x85,0xeb,0x83,0xf7,0x89,0xe0, - 0xd7,0x32,0x2a,0xd2,0x1a,0x64,0x41,0xef, - 0x49,0xff,0xc3,0x8c,0x54,0xf9,0x67,0x74, - 0x30,0x1e,0x70,0x2e,0xb7,0x12,0x09,0xfe, + 0xef, + 0x43, + 0xae, + 0x80, + 0xcc, + 0x8d, + 0xc3, + 0x4c, + 0x6f, + 0x1b, + 0xd6, + 0x18, + 0x1b, + 0xae, + 0x87, + 0x74, + 0x0c, + 0xca, + 0xf7, + 0x8e, + 0x5f, + 0x2e, + 0x54, + 0x32, + 0xf6, + 0x79, + 0xb9, + 0x27, + 0x26, + 0x96, + 0x20, + 0x92, + 0x70, + 0x07, + 0x85, + 0xeb, + 0x83, + 0xf7, + 0x89, + 0xe0, + 0xd7, + 0x32, + 0x2a, + 0xd2, + 0x1a, + 0x64, + 0x41, + 0xef, + 0x49, + 0xff, + 0xc3, + 0x8c, + 0x54, + 0xf9, + 0x67, + 0x74, + 0x30, + 0x1e, + 0x70, + 0x2e, + 0xb7, + 0x12, + 0x09, + 0xfe, }; - memcpy (&seed, hash_key, sizeof (seed)); + memcpy(&seed, hash_key, sizeof(seed)); - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { - n[0] = t1ha2_atonce128 (&n[1], - part->digest, sizeof (part->digest), - seed); + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { + n[0] = t1ha2_atonce128(&n[1], + part->digest, sizeof(part->digest), + seed); seed = n[0] ^ n[1]; } - memcpy (MESSAGE_FIELD (task, digest), n, sizeof (n)); + memcpy(MESSAGE_FIELD(task, digest), n, sizeof(n)); - if (MESSAGE_FIELD (task, subject)) { - p = MESSAGE_FIELD (task, subject); - len = strlen (p); - n[0] = t1ha2_atonce128 (&n[1], - p, len, - seed); - memcpy (MESSAGE_FIELD (task, digest), n, sizeof (n)); + if (MESSAGE_FIELD(task, subject)) { + p = MESSAGE_FIELD(task, subject); + len = strlen(p); + n[0] = t1ha2_atonce128(&n[1], + p, len, + seed); + memcpy(MESSAGE_FIELD(task, digest), n, sizeof(n)); } if (task->queue_id) { - msg_info_task ("loaded message; id: <%s>; queue-id: <%s>; size: %z; " - "checksum: <%*xs>", - MESSAGE_FIELD (task, message_id), task->queue_id, task->msg.len, - (gint)sizeof (MESSAGE_FIELD (task, digest)), MESSAGE_FIELD (task, digest)); + msg_info_task("loaded message; id: <%s>; queue-id: <%s>; size: %z; " + "checksum: <%*xs>", + MESSAGE_FIELD(task, message_id), task->queue_id, task->msg.len, + (gint) sizeof(MESSAGE_FIELD(task, digest)), MESSAGE_FIELD(task, digest)); } else { - msg_info_task ("loaded message; id: <%s>; size: %z; " - "checksum: <%*xs>", - MESSAGE_FIELD (task, message_id), task->msg.len, - (gint)sizeof (MESSAGE_FIELD (task, digest)), MESSAGE_FIELD (task, digest)); + msg_info_task("loaded message; id: <%s>; size: %z; " + "checksum: <%*xs>", + MESSAGE_FIELD(task, message_id), task->msg.len, + (gint) sizeof(MESSAGE_FIELD(task, digest)), MESSAGE_FIELD(task, digest)); } return TRUE; @@ -1320,11 +1377,11 @@ struct rspamd_mime_part_text_position { static int rspamd_mime_text_part_position_compare_func(const void *v1, const void *v2) { - const struct rspamd_mime_part_text_position *p1 = (const struct rspamd_mime_part_text_position *)v1; - const struct rspamd_mime_part_text_position *p2 = (const struct rspamd_mime_part_text_position *)v2; + const struct rspamd_mime_part_text_position *p1 = (const struct rspamd_mime_part_text_position *) v1; + const struct rspamd_mime_part_text_position *p2 = (const struct rspamd_mime_part_text_position *) v2; if (p1->res == p2->res) { - return (int)p2->pos - (int)p1->pos; + return (int) p2->pos - (int) p1->pos; } else { if (p1->res == RSPAMD_MESSAGE_PART_IS_TEXT_HTML) { @@ -1336,8 +1393,7 @@ rspamd_mime_text_part_position_compare_func(const void *v1, const void *v2) } } -void -rspamd_message_process (struct rspamd_task *task) +void rspamd_message_process(struct rspamd_task *task) { guint i; struct rspamd_mime_text_part *p1, *p2; @@ -1351,103 +1407,104 @@ rspamd_message_process (struct rspamd_task *task) L = task->cfg->lua_state; } - rspamd_archives_process (task); + rspamd_archives_process(task); if (L) { - old_top = lua_gettop (L); + old_top = lua_gettop(L); } - if (L && rspamd_lua_require_function (L, - "lua_magic", "detect_mime_part")) { - magic_func_pos = lua_gettop (L); + if (L && rspamd_lua_require_function(L, + "lua_magic", "detect_mime_part")) { + magic_func_pos = lua_gettop(L); } else { - msg_err_task ("cannot require lua_magic.detect_mime_part"); + msg_err_task("cannot require lua_magic.detect_mime_part"); } - if (L && rspamd_lua_require_function (L, - "lua_content", "maybe_process_mime_part")) { - content_func_pos = lua_gettop (L); + if (L && rspamd_lua_require_function(L, + "lua_content", "maybe_process_mime_part")) { + content_func_pos = lua_gettop(L); } else { - msg_err_task ("cannot require lua_content.maybe_process_mime_part"); + msg_err_task("cannot require lua_content.maybe_process_mime_part"); } if (L) { - funcs_top = lua_gettop (L); + funcs_top = lua_gettop(L); } - GArray *detected_text_parts = g_array_sized_new (FALSE, FALSE, sizeof(struct rspamd_mime_part_text_position), 2); + GArray *detected_text_parts = g_array_sized_new(FALSE, FALSE, sizeof(struct rspamd_mime_part_text_position), 2); - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { if (magic_func_pos != -1 && part->parsed_data.len > 0) { struct rspamd_mime_part **pmime; struct rspamd_task **ptask; - lua_pushcfunction (L, &rspamd_lua_traceback); - gint err_idx = lua_gettop (L); - lua_pushvalue (L, magic_func_pos); - pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *)); - rspamd_lua_setclass (L, "rspamd{mimepart}", -1); + lua_pushcfunction(L, &rspamd_lua_traceback); + gint err_idx = lua_gettop(L); + lua_pushvalue(L, magic_func_pos); + pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *)); + rspamd_lua_setclass(L, "rspamd{mimepart}", -1); *pmime = part; - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); - rspamd_lua_setclass (L, "rspamd{task}", -1); + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); + rspamd_lua_setclass(L, "rspamd{task}", -1); *ptask = task; - if (lua_pcall (L, 2, 2, err_idx) != 0) { - msg_err_task ("cannot detect type: %s", lua_tostring (L, -1)); + if (lua_pcall(L, 2, 2, err_idx) != 0) { + msg_err_task("cannot detect type: %s", lua_tostring(L, -1)); } else { - if (lua_istable (L, -1)) { + if (lua_istable(L, -1)) { const gchar *mb; /* First returned value */ - part->detected_ext = rspamd_mempool_strdup (task->task_pool, - lua_tostring (L, -2)); + part->detected_ext = rspamd_mempool_strdup(task->task_pool, + lua_tostring(L, -2)); - lua_pushstring (L, "ct"); - lua_gettable (L, -2); + lua_pushstring(L, "ct"); + lua_gettable(L, -2); - if (lua_isstring (L, -1)) { - mb = lua_tostring (L, -1); + if (lua_isstring(L, -1)) { + mb = lua_tostring(L, -1); if (mb) { rspamd_ftok_t srch; srch.begin = mb; - srch.len = strlen (mb); - part->detected_ct = rspamd_content_type_parse (srch.begin, - srch.len, - task->task_pool); + srch.len = strlen(mb); + part->detected_ct = rspamd_content_type_parse(srch.begin, + srch.len, + task->task_pool); } } - lua_pop (L, 1); + lua_pop(L, 1); - lua_pushstring (L, "type"); - lua_gettable (L, -2); + lua_pushstring(L, "type"); + lua_gettable(L, -2); - if (lua_isstring (L, -1)) { - part->detected_type = rspamd_mempool_strdup (task->task_pool, - lua_tostring (L, -1)); + if (lua_isstring(L, -1)) { + part->detected_type = rspamd_mempool_strdup(task->task_pool, + lua_tostring(L, -1)); } - lua_pop (L, 1); + lua_pop(L, 1); - lua_pushstring (L, "no_text"); - lua_gettable (L, -2); + lua_pushstring(L, "no_text"); + lua_gettable(L, -2); - if (lua_isboolean (L, -1)) { - if (!!lua_toboolean (L, -1)) { + if (lua_isboolean(L, -1)) { + if (!!lua_toboolean(L, -1)) { part->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION; } } - lua_pop (L, 1); + lua_pop(L, 1); } } - lua_settop (L, funcs_top); + lua_settop(L, funcs_top); } /* Now detect content */ @@ -1456,25 +1513,25 @@ rspamd_message_process (struct rspamd_task *task) struct rspamd_mime_part **pmime; struct rspamd_task **ptask; - lua_pushcfunction (L, &rspamd_lua_traceback); - gint err_idx = lua_gettop (L); - lua_pushvalue (L, content_func_pos); - pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *)); - rspamd_lua_setclass (L, "rspamd{mimepart}", -1); + lua_pushcfunction(L, &rspamd_lua_traceback); + gint err_idx = lua_gettop(L); + lua_pushvalue(L, content_func_pos); + pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *)); + rspamd_lua_setclass(L, "rspamd{mimepart}", -1); *pmime = part; - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); - rspamd_lua_setclass (L, "rspamd{task}", -1); + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); + rspamd_lua_setclass(L, "rspamd{task}", -1); *ptask = task; - if (lua_pcall (L, 2, 0, err_idx) != 0) { - msg_err_task ("cannot detect content: %s", lua_tostring (L, -1)); + if (lua_pcall(L, 2, 0, err_idx) != 0) { + msg_err_task("cannot detect content: %s", lua_tostring(L, -1)); } - lua_settop (L, funcs_top); + lua_settop(L, funcs_top); } /* Try to detect image before checking for text */ - rspamd_images_process_mime_part_maybe (task, part); + rspamd_images_process_mime_part_maybe(task, part); if (part->part_type == RSPAMD_MIME_PART_UNDEFINED && !(part->flags & RSPAMD_MIME_PART_NO_TEXT_EXTRACTION)) { @@ -1483,9 +1540,8 @@ rspamd_message_process (struct rspamd_task *task) if (res != RSPAMD_MESSAGE_PART_IS_NOT_TEXT) { struct rspamd_mime_part_text_position p = { .pos = i, - .res = res - }; - g_array_append_val (detected_text_parts, p); + .res = res}; + g_array_append_val(detected_text_parts, p); } } } @@ -1493,26 +1549,26 @@ rspamd_message_process (struct rspamd_task *task) uint16_t cur_url_order = 0; g_array_sort(detected_text_parts, rspamd_mime_text_part_position_compare_func); /* One more iteration to process text parts in a more specific order */ - for (i = 0; i < detected_text_parts->len; i ++) { - part = g_ptr_array_index (MESSAGE_FIELD (task, parts), - g_array_index(detected_text_parts, struct rspamd_mime_part_text_position, i).pos); + for (i = 0; i < detected_text_parts->len; i++) { + part = g_ptr_array_index(MESSAGE_FIELD(task, parts), + g_array_index(detected_text_parts, struct rspamd_mime_part_text_position, i).pos); rspamd_message_process_text_part_maybe(task, part, - g_array_index(detected_text_parts, struct rspamd_mime_part_text_position, i).res, &cur_url_order); + g_array_index(detected_text_parts, struct rspamd_mime_part_text_position, i).res, &cur_url_order); } - g_array_free (detected_text_parts, TRUE); + g_array_free(detected_text_parts, TRUE); if (old_top != -1) { - lua_settop (L, old_top); + lua_settop(L, old_top); } /* Parse urls inside Subject header */ - if (MESSAGE_FIELD (task, subject)) { - rspamd_url_find_multiple (task->task_pool, MESSAGE_FIELD (task, subject), - strlen (MESSAGE_FIELD (task, subject)), - RSPAMD_URL_FIND_STRICT, NULL, - rspamd_url_task_subject_callback, - task); + if (MESSAGE_FIELD(task, subject)) { + rspamd_url_find_multiple(task->task_pool, MESSAGE_FIELD(task, subject), + strlen(MESSAGE_FIELD(task, subject)), + RSPAMD_URL_FIND_STRICT, NULL, + rspamd_url_task_subject_callback, + task); } /* Calculate average words length and number of short words */ @@ -1520,12 +1576,13 @@ rspamd_message_process (struct rspamd_task *task) gdouble *var; guint total_words = 0; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, text_part) + { if (!text_part->language) { - rspamd_mime_part_detect_language (task, text_part); + rspamd_mime_part_detect_language(task, text_part); } - rspamd_mime_part_extract_words (task, text_part); + rspamd_mime_part_extract_words(task, text_part); if (text_part->utf_words) { total_words += text_part->nwords; @@ -1534,8 +1591,8 @@ rspamd_message_process (struct rspamd_task *task) /* Calculate distance for 2-parts messages */ if (i == 2) { - p1 = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0); - p2 = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 1); + p1 = g_ptr_array_index(MESSAGE_FIELD(task, text_parts), 0); + p2 = g_ptr_array_index(MESSAGE_FIELD(task, text_parts), 1); /* First of all check parent object */ if (p1->mime_part->parent_part) { @@ -1544,8 +1601,8 @@ rspamd_message_process (struct rspamd_task *task) srch.begin = "alternative"; srch.len = 11; - if (rspamd_ftok_cmp (&p1->mime_part->parent_part->ct->subtype, &srch) == 0) { - if (!IS_TEXT_PART_EMPTY (p1) && !IS_TEXT_PART_EMPTY (p2) && + if (rspamd_ftok_cmp(&p1->mime_part->parent_part->ct->subtype, &srch) == 0) { + if (!IS_TEXT_PART_EMPTY(p1) && !IS_TEXT_PART_EMPTY(p2) && p1->normalized_hashes && p2->normalized_hashes) { /* * We also detect language on one part and propagate it to @@ -1554,10 +1611,10 @@ rspamd_message_process (struct rspamd_task *task) struct rspamd_mime_text_part *sel; /* Prefer HTML as text part is not displayed normally */ - if (IS_TEXT_PART_HTML (p1)) { + if (IS_TEXT_PART_HTML(p1)) { sel = p1; } - else if (IS_TEXT_PART_HTML (p2)) { + else if (IS_TEXT_PART_HTML(p2)) { sel = p2; } else { @@ -1573,104 +1630,104 @@ rspamd_message_process (struct rspamd_task *task) /* Propagate language */ if (sel == p1) { if (p2->languages) { - g_ptr_array_unref (p2->languages); + g_ptr_array_unref(p2->languages); } p2->language = sel->language; - p2->languages = g_ptr_array_ref (sel->languages); + p2->languages = g_ptr_array_ref(sel->languages); } else { if (p1->languages) { - g_ptr_array_unref (p1->languages); + g_ptr_array_unref(p1->languages); } p1->language = sel->language; - p1->languages = g_ptr_array_ref (sel->languages); + p1->languages = g_ptr_array_ref(sel->languages); } } tw = p1->normalized_hashes->len + p2->normalized_hashes->len; if (tw > 0) { - dw = rspamd_words_levenshtein_distance (task, - p1->normalized_hashes, - p2->normalized_hashes); - diff = dw / (gdouble)tw; - - msg_debug_task ( - "different words: %d, total words: %d, " - "got diff between parts of %.2f", - dw, tw, - diff); - - pdiff = rspamd_mempool_alloc (task->task_pool, - sizeof (gdouble)); + dw = rspamd_words_levenshtein_distance(task, + p1->normalized_hashes, + p2->normalized_hashes); + diff = dw / (gdouble) tw; + + msg_debug_task( + "different words: %d, total words: %d, " + "got diff between parts of %.2f", + dw, tw, + diff); + + pdiff = rspamd_mempool_alloc(task->task_pool, + sizeof(gdouble)); *pdiff = diff; - rspamd_mempool_set_variable (task->task_pool, - "parts_distance", - pdiff, - NULL); - ptw = rspamd_mempool_alloc (task->task_pool, - sizeof (gint)); + rspamd_mempool_set_variable(task->task_pool, + "parts_distance", + pdiff, + NULL); + ptw = rspamd_mempool_alloc(task->task_pool, + sizeof(gint)); *ptw = tw; - rspamd_mempool_set_variable (task->task_pool, - "total_words", - ptw, - NULL); + rspamd_mempool_set_variable(task->task_pool, + "total_words", + ptw, + NULL); } } } } else { - debug_task ( - "message contains two parts but they are in different multi-parts"); + debug_task( + "message contains two parts but they are in different multi-parts"); } } if (total_words > 0) { - var = rspamd_mempool_get_variable (task->task_pool, - RSPAMD_MEMPOOL_AVG_WORDS_LEN); + var = rspamd_mempool_get_variable(task->task_pool, + RSPAMD_MEMPOOL_AVG_WORDS_LEN); if (var) { - *var /= (double)total_words; + *var /= (double) total_words; } - var = rspamd_mempool_get_variable (task->task_pool, - RSPAMD_MEMPOOL_SHORT_WORDS_CNT); + var = rspamd_mempool_get_variable(task->task_pool, + RSPAMD_MEMPOOL_SHORT_WORDS_CNT); if (var) { - *var /= (double)total_words; + *var /= (double) total_words; } } - rspamd_images_link (task); - rspamd_tokenize_meta_words (task); + rspamd_images_link(task); + rspamd_tokenize_meta_words(task); } struct rspamd_message * -rspamd_message_ref (struct rspamd_message *msg) +rspamd_message_ref(struct rspamd_message *msg) { - REF_RETAIN (msg); + REF_RETAIN(msg); return msg; } -void rspamd_message_unref (struct rspamd_message *msg) +void rspamd_message_unref(struct rspamd_message *msg) { if (msg) { - REF_RELEASE (msg); + REF_RELEASE(msg); } } -void rspamd_message_update_digest (struct rspamd_message *msg, - const void *input, gsize len) +void rspamd_message_update_digest(struct rspamd_message *msg, + const void *input, gsize len) { guint64 n[2]; /* Sanity */ - G_STATIC_ASSERT (sizeof (n) == sizeof (msg->digest)); + G_STATIC_ASSERT(sizeof(n) == sizeof(msg->digest)); - memcpy (n, msg->digest, sizeof (msg->digest)); - n[0] = t1ha2_atonce128 (&n[1], input, len, n[0]); - memcpy (msg->digest, n, sizeof (msg->digest)); + memcpy(n, msg->digest, sizeof(msg->digest)); + n[0] = t1ha2_atonce128(&n[1], input, len, n[0]); + memcpy(msg->digest, n, sizeof(msg->digest)); } diff --git a/src/libmime/message.h b/src/libmime/message.h index d5329efa7..52dedaba7 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -20,7 +20,7 @@ #include <unicode/uchar.h> #include <unicode/utext.h> -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -48,7 +48,7 @@ enum rspamd_mime_part_type { #define IS_PART_MULTIPART(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_MULTIPART)) #define IS_PART_TEXT(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_TEXT)) -#define IS_PART_MESSAGE(part) ((part) &&((part)->part_type == RSPAMD_MIME_PART_MESSAGE)) +#define IS_PART_MESSAGE(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_MESSAGE)) enum rspamd_cte { RSPAMD_CTE_UNKNOWN = 0, @@ -135,16 +135,16 @@ struct rspamd_mime_text_part { rspamd_ftok_t parsed; /* decoded from mime encodings */ /* UTF8 content */ - rspamd_ftok_t utf_content; /* utf8 encoded processed content */ - GByteArray *utf_raw_content; /* utf raw content */ + rspamd_ftok_t utf_content; /* utf8 encoded processed content */ + GByteArray *utf_raw_content; /* utf raw content */ GByteArray *utf_stripped_content; /* utf content with no newlines */ - GArray *normalized_hashes; /* Array of guint64 */ - GArray *utf_words; /* Array of rspamd_stat_token_t */ - UText utf_stripped_text; /* Used by libicu to represent the utf8 content */ + GArray *normalized_hashes; /* Array of guint64 */ + GArray *utf_words; /* Array of rspamd_stat_token_t */ + UText utf_stripped_text; /* Used by libicu to represent the utf8 content */ - GPtrArray *newlines; /**< positions of newlines in text, relative to content*/ + GPtrArray *newlines; /**< positions of newlines in text, relative to content*/ void *html; - GList *exceptions; /**< list of offsets of urls */ + GList *exceptions; /**< list of offsets of urls */ struct rspamd_mime_part *mime_part; guint flags; @@ -171,38 +171,36 @@ struct rspamd_message { const gchar *message_id; gchar *subject; - GPtrArray *parts; /**< list of parsed parts */ - GPtrArray *text_parts; /**< list of text parts */ + GPtrArray *parts; /**< list of parsed parts */ + GPtrArray *text_parts; /**< list of text parts */ struct rspamd_message_raw_headers_content raw_headers_content; - void *received_headers; /**< list of received headers */ - khash_t (rspamd_url_hash) *urls; - struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers */ - struct rspamd_mime_header *headers_order; /**< order of raw headers */ + void *received_headers; /**< list of received headers */ + khash_t(rspamd_url_hash) * urls; + struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers */ + struct rspamd_mime_header *headers_order; /**< order of raw headers */ struct rspamd_task *task; GPtrArray *rcpt_mime; GPtrArray *from_mime; guchar digest[16]; - enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */ + enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */ ref_entry_t ref; }; #define MESSAGE_FIELD(task, field) ((task)->message->field) -#define MESSAGE_FIELD_CHECK(task, field) ((task)->message ? \ - (task)->message->field : \ - (__typeof__((task)->message->field))NULL) +#define MESSAGE_FIELD_CHECK(task, field) ((task)->message ? (task)->message->field : (__typeof__((task)->message->field)) NULL) /** * Parse and pre-process mime message * @param task worker_task object * @return */ -gboolean rspamd_message_parse (struct rspamd_task *task); +gboolean rspamd_message_parse(struct rspamd_task *task); /** * Process content in task (e.g. HTML parsing) * @param task */ -void rspamd_message_process (struct rspamd_task *task); +void rspamd_message_process(struct rspamd_task *task); /** @@ -210,20 +208,20 @@ void rspamd_message_process (struct rspamd_task *task); * @param str * @return */ -enum rspamd_cte rspamd_cte_from_string (const gchar *str); +enum rspamd_cte rspamd_cte_from_string(const gchar *str); /** * Converts cte to string * @param ct * @return */ -const gchar *rspamd_cte_to_string (enum rspamd_cte ct); +const gchar *rspamd_cte_to_string(enum rspamd_cte ct); -struct rspamd_message* rspamd_message_new (struct rspamd_task *task); +struct rspamd_message *rspamd_message_new(struct rspamd_task *task); -struct rspamd_message *rspamd_message_ref (struct rspamd_message *msg); +struct rspamd_message *rspamd_message_ref(struct rspamd_message *msg); -void rspamd_message_unref (struct rspamd_message *msg); +void rspamd_message_unref(struct rspamd_message *msg); /** * Updates digest of the message if modified @@ -231,10 +229,10 @@ void rspamd_message_unref (struct rspamd_message *msg); * @param input * @param len */ -void rspamd_message_update_digest (struct rspamd_message *msg, - const void *input, gsize len); +void rspamd_message_update_digest(struct rspamd_message *msg, + const void *input, gsize len); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index a3467fc91..48a97a4af 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -54,23 +54,22 @@ struct rspamd_charset_substitution { static GHashTable *sub_hash = NULL; static const UChar iso_8859_16_map[] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, - 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, - 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, - 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, - 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, - 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, - 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF -}; + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, + 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, + 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, + 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, + 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, + 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF}; struct rspamd_charset_converter { gchar *canon_name; @@ -82,37 +81,37 @@ struct rspamd_charset_converter { }; static GQuark -rspamd_charset_conv_error_quark (void) +rspamd_charset_conv_error_quark(void) { - return g_quark_from_static_string ("charset conversion error"); + return g_quark_from_static_string("charset conversion error"); } static void -rspamd_converter_dtor (gpointer p) +rspamd_converter_dtor(gpointer p) { - struct rspamd_charset_converter *c = (struct rspamd_charset_converter *)p; + struct rspamd_charset_converter *c = (struct rspamd_charset_converter *) p; if (!c->is_internal) { - ucnv_close (c->d.conv); + ucnv_close(c->d.conv); } - g_free (c->canon_name); - g_free (c); + g_free(c->canon_name); + g_free(c); } int32_t -rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv, - UChar *dest, - int32_t destCapacity, - const char *src, - int32_t srcLength, - UErrorCode *pErrorCode) +rspamd_converter_to_uchars(struct rspamd_charset_converter *cnv, + UChar *dest, + int32_t destCapacity, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode) { if (!cnv->is_internal) { - return ucnv_toUChars (cnv->d.conv, - dest, destCapacity, - src, srcLength, - pErrorCode); + return ucnv_toUChars(cnv->d.conv, + dest, destCapacity, + src, srcLength, + pErrorCode); } else { UChar *d = dest, *dend = dest + destCapacity; @@ -120,13 +119,13 @@ rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv, while (p < end && d < dend) { if (*p <= 127) { - *d++ = (UChar)*p; + *d++ = (UChar) *p; } else { *d++ = cnv->d.cnv_table[*p - 128]; } - p ++; + p++; } return d - dest; @@ -135,19 +134,19 @@ rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv, struct rspamd_charset_converter * -rspamd_mime_get_converter_cached (const gchar *enc, - rspamd_mempool_t *pool, - gboolean is_canon, - UErrorCode *err) +rspamd_mime_get_converter_cached(const gchar *enc, + rspamd_mempool_t *pool, + gboolean is_canon, + UErrorCode *err) { const gchar *canon_name; static rspamd_lru_hash_t *cache; struct rspamd_charset_converter *conv; if (cache == NULL) { - cache = rspamd_lru_hash_new_full (RSPAMD_CHARSET_CACHE_SIZE, NULL, - rspamd_converter_dtor, rspamd_str_hash, - rspamd_str_equal); + cache = rspamd_lru_hash_new_full(RSPAMD_CHARSET_CACHE_SIZE, NULL, + rspamd_converter_dtor, rspamd_str_hash, + rspamd_str_equal); } if (enc == NULL) { @@ -157,8 +156,8 @@ rspamd_mime_get_converter_cached (const gchar *enc, if (!is_canon) { rspamd_ftok_t cset_tok; - RSPAMD_FTOK_FROM_STR (&cset_tok, enc); - canon_name = rspamd_mime_detect_charset (&cset_tok, pool); + RSPAMD_FTOK_FROM_STR(&cset_tok, enc); + canon_name = rspamd_mime_detect_charset(&cset_tok, pool); } else { canon_name = enc; @@ -168,38 +167,38 @@ rspamd_mime_get_converter_cached (const gchar *enc, return NULL; } - conv = rspamd_lru_hash_lookup (cache, (gpointer)canon_name, 0); + conv = rspamd_lru_hash_lookup(cache, (gpointer) canon_name, 0); if (conv == NULL) { - if (!(strcmp (canon_name, "ISO-8859-16") == 0 || - strcmp (canon_name, "latin10") == 0 || - strcmp (canon_name, "iso-ir-226") == 0)) { - conv = g_malloc0 (sizeof (*conv)); - conv->d.conv = ucnv_open (canon_name, err); - conv->canon_name = g_strdup (canon_name); + if (!(strcmp(canon_name, "ISO-8859-16") == 0 || + strcmp(canon_name, "latin10") == 0 || + strcmp(canon_name, "iso-ir-226") == 0)) { + conv = g_malloc0(sizeof(*conv)); + conv->d.conv = ucnv_open(canon_name, err); + conv->canon_name = g_strdup(canon_name); if (conv->d.conv != NULL) { - ucnv_setToUCallBack (conv->d.conv, - UCNV_TO_U_CALLBACK_SUBSTITUTE, - NULL, - NULL, - NULL, - err); - rspamd_lru_hash_insert (cache, conv->canon_name, conv, 0, 0); + ucnv_setToUCallBack(conv->d.conv, + UCNV_TO_U_CALLBACK_SUBSTITUTE, + NULL, + NULL, + NULL, + err); + rspamd_lru_hash_insert(cache, conv->canon_name, conv, 0, 0); } else { - g_free (conv); + g_free(conv); conv = NULL; } } else { /* ISO-8859-16 */ - conv = g_malloc0 (sizeof (*conv)); + conv = g_malloc0(sizeof(*conv)); conv->is_internal = TRUE; conv->d.cnv_table = iso_8859_16_map; - conv->canon_name = g_strdup (canon_name); + conv->canon_name = g_strdup(canon_name); - rspamd_lru_hash_insert (cache, conv->canon_name, conv, 0, 0); + rspamd_lru_hash_insert(cache, conv->canon_name, conv, 0, 0); } } @@ -207,19 +206,19 @@ rspamd_mime_get_converter_cached (const gchar *enc, } static void -rspamd_mime_encoding_substitute_init (void) +rspamd_mime_encoding_substitute_init(void) { guint i; - sub_hash = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + sub_hash = g_hash_table_new(rspamd_strcase_hash, rspamd_strcase_equal); - for (i = 0; i < G_N_ELEMENTS (sub); i ++) { - g_hash_table_insert (sub_hash, (void *)sub[i].input, (void *)&sub[i]); + for (i = 0; i < G_N_ELEMENTS(sub); i++) { + g_hash_table_insert(sub_hash, (void *) sub[i].input, (void *) &sub[i]); } } static void -rspamd_charset_normalize (gchar *in) +rspamd_charset_normalize(gchar *in) { /* * This is a simple routine to validate input charset @@ -231,26 +230,26 @@ rspamd_charset_normalize (gchar *in) begin = in; - while (*begin && !g_ascii_isalnum (*begin)) { - begin ++; + while (*begin && !g_ascii_isalnum(*begin)) { + begin++; changed = TRUE; } - end = begin + strlen (begin) - 1; + end = begin + strlen(begin) - 1; - while (end > begin && !g_ascii_isalnum (*end)) { - end --; + while (end > begin && !g_ascii_isalnum(*end)) { + end--; changed = TRUE; } if (changed) { - memmove (in, begin, end - begin + 2); + memmove(in, begin, end - begin + 2); *(end + 1) = '\0'; } } const gchar * -rspamd_mime_detect_charset (const rspamd_ftok_t *in, rspamd_mempool_t *pool) +rspamd_mime_detect_charset(const rspamd_ftok_t *in, rspamd_mempool_t *pool) { gchar *ret = NULL, *h, *t; struct rspamd_charset_substitution *s; @@ -259,27 +258,27 @@ rspamd_mime_detect_charset (const rspamd_ftok_t *in, rspamd_mempool_t *pool) UErrorCode uc_err = U_ZERO_ERROR; if (sub_hash == NULL) { - rspamd_mime_encoding_substitute_init (); + rspamd_mime_encoding_substitute_init(); } /* Fast path */ - RSPAMD_FTOK_ASSIGN (&utf8_tok, "utf-8"); + RSPAMD_FTOK_ASSIGN(&utf8_tok, "utf-8"); - if (rspamd_ftok_casecmp (in, &utf8_tok) == 0) { + if (rspamd_ftok_casecmp(in, &utf8_tok) == 0) { return UTF8_CHARSET; } - RSPAMD_FTOK_ASSIGN (&utf8_tok, "utf8"); + RSPAMD_FTOK_ASSIGN(&utf8_tok, "utf8"); - if (rspamd_ftok_casecmp (in, &utf8_tok) == 0) { + if (rspamd_ftok_casecmp(in, &utf8_tok) == 0) { return UTF8_CHARSET; } - ret = rspamd_mempool_ftokdup (pool, in); - rspamd_charset_normalize (ret); + ret = rspamd_mempool_ftokdup(pool, in); + rspamd_charset_normalize(ret); - if ((in->len > 3 && rspamd_lc_cmp (in->begin, "cp-", 3) == 0) || - (in->len > 4 && (rspamd_lc_cmp (in->begin, "ibm-", 4) == 0))) { + if ((in->len > 3 && rspamd_lc_cmp(in->begin, "cp-", 3) == 0) || + (in->len > 4 && (rspamd_lc_cmp(in->begin, "ibm-", 4) == 0))) { /* Try to remove '-' chars from encoding: e.g. CP-100 to CP100 */ h = ret; t = ret; @@ -289,43 +288,43 @@ rspamd_mime_detect_charset (const rspamd_ftok_t *in, rspamd_mempool_t *pool) *t++ = *h; } - h ++; + h++; } *t = '\0'; } - s = g_hash_table_lookup (sub_hash, ret); + s = g_hash_table_lookup(sub_hash, ret); if (s) { - ret = (char *)s->canon; + ret = (char *) s->canon; } /* Try different aliases */ - cset = ucnv_getCanonicalName (ret, "MIME", &uc_err); + cset = ucnv_getCanonicalName(ret, "MIME", &uc_err); if (cset == NULL) { uc_err = U_ZERO_ERROR; - cset = ucnv_getCanonicalName (ret, "IANA", &uc_err); + cset = ucnv_getCanonicalName(ret, "IANA", &uc_err); } if (cset == NULL) { uc_err = U_ZERO_ERROR; - cset = ucnv_getCanonicalName (ret, "", &uc_err); + cset = ucnv_getCanonicalName(ret, "", &uc_err); } if (cset == NULL) { uc_err = U_ZERO_ERROR; - cset = ucnv_getAlias (ret, 0, &uc_err); + cset = ucnv_getAlias(ret, 0, &uc_err); } return cset; } gchar * -rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, - gchar *input, gsize len, const gchar *in_enc, - gsize *olen, GError **err) +rspamd_mime_text_to_utf8(rspamd_mempool_t *pool, + gchar *input, gsize len, const gchar *in_enc, + gsize *olen, GError **err) { gchar *d; gint32 r, clen, dlen; @@ -337,12 +336,12 @@ rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, rspamd_ftok_t cset_tok; /* Check if already utf8 */ - RSPAMD_FTOK_FROM_STR (&cset_tok, in_enc); + RSPAMD_FTOK_FROM_STR(&cset_tok, in_enc); - if (rspamd_mime_charset_utf_check (&cset_tok, input, len, - FALSE)) { - d = rspamd_mempool_alloc (pool, len); - memcpy (d, input, len); + if (rspamd_mime_charset_utf_check(&cset_tok, input, len, + FALSE)) { + d = rspamd_mempool_alloc(pool, len); + memcpy(d, input, len); if (olen) { *olen = len; } @@ -350,48 +349,48 @@ rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, return d; } - conv = rspamd_mime_get_converter_cached (in_enc, pool, TRUE, &uc_err); - utf8_converter = rspamd_get_utf8_converter (); + conv = rspamd_mime_get_converter_cached(in_enc, pool, TRUE, &uc_err); + utf8_converter = rspamd_get_utf8_converter(); if (conv == NULL) { - g_set_error (err, rspamd_charset_conv_error_quark(), EINVAL, - "cannot open converter for %s: %s", - in_enc, u_errorName (uc_err)); + g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL, + "cannot open converter for %s: %s", + in_enc, u_errorName(uc_err)); return NULL; } - tmp_buf = g_new (UChar, len + 1); + tmp_buf = g_new(UChar, len + 1); uc_err = U_ZERO_ERROR; - r = rspamd_converter_to_uchars (conv, tmp_buf, len + 1, input, len, &uc_err); + r = rspamd_converter_to_uchars(conv, tmp_buf, len + 1, input, len, &uc_err); - if (!U_SUCCESS (uc_err)) { - g_set_error (err, rspamd_charset_conv_error_quark(), EINVAL, + if (!U_SUCCESS(uc_err)) { + g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL, "cannot convert data to unicode from %s: %s", - in_enc, u_errorName (uc_err)); - g_free (tmp_buf); + in_enc, u_errorName(uc_err)); + g_free(tmp_buf); return NULL; } /* Now, convert to utf8 */ - clen = ucnv_getMaxCharSize (utf8_converter); - dlen = UCNV_GET_MAX_BYTES_FOR_STRING (r, clen); - d = rspamd_mempool_alloc (pool, dlen); - r = ucnv_fromUChars (utf8_converter, d, dlen, tmp_buf, r, &uc_err); + clen = ucnv_getMaxCharSize(utf8_converter); + dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen); + d = rspamd_mempool_alloc(pool, dlen); + r = ucnv_fromUChars(utf8_converter, d, dlen, tmp_buf, r, &uc_err); - if (!U_SUCCESS (uc_err)) { - g_set_error (err, rspamd_charset_conv_error_quark(), EINVAL, - "cannot convert data from unicode from %s: %s", - in_enc, u_errorName (uc_err)); - g_free (tmp_buf); + if (!U_SUCCESS(uc_err)) { + g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL, + "cannot convert data from unicode from %s: %s", + in_enc, u_errorName(uc_err)); + g_free(tmp_buf); return NULL; } - msg_debug_pool ("converted from %s to UTF-8 inlen: %z, outlen: %d", - in_enc, len, r); - g_free (tmp_buf); + msg_debug_pool("converted from %s to UTF-8 inlen: %z, outlen: %d", + in_enc, len, r); + g_free(tmp_buf); if (olen) { *olen = r; @@ -401,11 +400,11 @@ rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, } static gboolean -rspamd_mime_text_part_utf8_convert (struct rspamd_task *task, - struct rspamd_mime_text_part *text_part, - GByteArray *input, - const gchar *charset, - GError **err) +rspamd_mime_text_part_utf8_convert(struct rspamd_task *task, + struct rspamd_mime_text_part *text_part, + GByteArray *input, + const gchar *charset, + GError **err) { gchar *d; gint32 r, clen, dlen, uc_len; @@ -414,76 +413,76 @@ rspamd_mime_text_part_utf8_convert (struct rspamd_task *task, UConverter *utf8_converter; struct rspamd_charset_converter *conv; - conv = rspamd_mime_get_converter_cached (charset, task->task_pool, - TRUE, &uc_err); - utf8_converter = rspamd_get_utf8_converter (); + conv = rspamd_mime_get_converter_cached(charset, task->task_pool, + TRUE, &uc_err); + utf8_converter = rspamd_get_utf8_converter(); if (conv == NULL) { - g_set_error (err, rspamd_charset_conv_error_quark(), EINVAL, - "cannot open converter for %s: %s", - charset, u_errorName (uc_err)); + g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL, + "cannot open converter for %s: %s", + charset, u_errorName(uc_err)); return FALSE; } - tmp_buf = g_new (UChar, input->len + 1); + tmp_buf = g_new(UChar, input->len + 1); uc_err = U_ZERO_ERROR; - uc_len = rspamd_converter_to_uchars (conv, - tmp_buf, - input->len + 1, - input->data, - input->len, - &uc_err); - - if (!U_SUCCESS (uc_err)) { - g_set_error (err, rspamd_charset_conv_error_quark(), EINVAL, - "cannot convert data to unicode from %s: %s", - charset, u_errorName (uc_err)); - g_free (tmp_buf); + uc_len = rspamd_converter_to_uchars(conv, + tmp_buf, + input->len + 1, + input->data, + input->len, + &uc_err); + + if (!U_SUCCESS(uc_err)) { + g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL, + "cannot convert data to unicode from %s: %s", + charset, u_errorName(uc_err)); + g_free(tmp_buf); return FALSE; } /* Now, convert to utf8 */ - clen = ucnv_getMaxCharSize (utf8_converter); - dlen = UCNV_GET_MAX_BYTES_FOR_STRING (uc_len, clen); - d = rspamd_mempool_alloc (task->task_pool, dlen); - r = ucnv_fromUChars (utf8_converter, d, dlen, - tmp_buf, uc_len, &uc_err); - - if (!U_SUCCESS (uc_err)) { - g_set_error (err, rspamd_charset_conv_error_quark(), EINVAL, - "cannot convert data from unicode from %s: %s", - charset, u_errorName (uc_err)); - g_free (tmp_buf); + clen = ucnv_getMaxCharSize(utf8_converter); + dlen = UCNV_GET_MAX_BYTES_FOR_STRING(uc_len, clen); + d = rspamd_mempool_alloc(task->task_pool, dlen); + r = ucnv_fromUChars(utf8_converter, d, dlen, + tmp_buf, uc_len, &uc_err); + + if (!U_SUCCESS(uc_err)) { + g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL, + "cannot convert data from unicode from %s: %s", + charset, u_errorName(uc_err)); + g_free(tmp_buf); return FALSE; } if (text_part->mime_part && text_part->mime_part->ct) { - msg_info_task ("converted text part from %s ('%T' announced) to UTF-8 inlen: %d, outlen: %d (%d UTF16 chars)", - charset, &text_part->mime_part->ct->charset, input->len, r, uc_len); + msg_info_task("converted text part from %s ('%T' announced) to UTF-8 inlen: %d, outlen: %d (%d UTF16 chars)", + charset, &text_part->mime_part->ct->charset, input->len, r, uc_len); } else { - msg_info_task ("converted text part from %s (no charset announced) to UTF-8 inlen: %d, " - "outlen: %d (%d UTF16 chars)", - charset, input->len, r, uc_len); + msg_info_task("converted text part from %s (no charset announced) to UTF-8 inlen: %d, " + "outlen: %d (%d UTF16 chars)", + charset, input->len, r, uc_len); } - text_part->utf_raw_content = rspamd_mempool_alloc (task->task_pool, - sizeof (*text_part->utf_raw_content) + sizeof (gpointer) * 4); + text_part->utf_raw_content = rspamd_mempool_alloc(task->task_pool, + sizeof(*text_part->utf_raw_content) + sizeof(gpointer) * 4); text_part->utf_raw_content->data = d; text_part->utf_raw_content->len = r; - g_free (tmp_buf); + g_free(tmp_buf); return TRUE; } gboolean -rspamd_mime_to_utf8_byte_array (GByteArray *in, - GByteArray *out, - rspamd_mempool_t *pool, - const gchar *enc) +rspamd_mime_to_utf8_byte_array(GByteArray *in, + GByteArray *out, + rspamd_mempool_t *pool, + const gchar *enc) { gint32 r, clen, dlen; UChar *tmp_buf; @@ -498,9 +497,9 @@ rspamd_mime_to_utf8_byte_array (GByteArray *in, if (enc == NULL) { /* Assume utf ? */ - if (rspamd_fast_utf8_validate (in->data, in->len) == 0) { - g_byte_array_set_size (out, in->len); - memcpy (out->data, in->data, out->len); + if (rspamd_fast_utf8_validate(in->data, in->len) == 0) { + g_byte_array_set_size(out, in->len); + memcpy(out->data, in->data, out->len); return TRUE; } @@ -510,55 +509,54 @@ rspamd_mime_to_utf8_byte_array (GByteArray *in, } } - RSPAMD_FTOK_FROM_STR (&charset_tok, enc); + RSPAMD_FTOK_FROM_STR(&charset_tok, enc); - if (rspamd_mime_charset_utf_check (&charset_tok, (gchar *)in->data, in->len, - FALSE)) { - g_byte_array_set_size (out, in->len); - memcpy (out->data, in->data, out->len); + if (rspamd_mime_charset_utf_check(&charset_tok, (gchar *) in->data, in->len, + FALSE)) { + g_byte_array_set_size(out, in->len); + memcpy(out->data, in->data, out->len); return TRUE; } - utf8_converter = rspamd_get_utf8_converter (); - conv = rspamd_mime_get_converter_cached (enc, pool, TRUE, &uc_err); + utf8_converter = rspamd_get_utf8_converter(); + conv = rspamd_mime_get_converter_cached(enc, pool, TRUE, &uc_err); if (conv == NULL) { return FALSE; } - tmp_buf = g_new (UChar, in->len + 1); + tmp_buf = g_new(UChar, in->len + 1); uc_err = U_ZERO_ERROR; - r = rspamd_converter_to_uchars (conv, - tmp_buf, in->len + 1, - in->data, in->len, &uc_err); + r = rspamd_converter_to_uchars(conv, + tmp_buf, in->len + 1, + in->data, in->len, &uc_err); - if (!U_SUCCESS (uc_err)) { - g_free (tmp_buf); + if (!U_SUCCESS(uc_err)) { + g_free(tmp_buf); return FALSE; } /* Now, convert to utf8 */ - clen = ucnv_getMaxCharSize (utf8_converter); - dlen = UCNV_GET_MAX_BYTES_FOR_STRING (r, clen); - g_byte_array_set_size (out, dlen); - r = ucnv_fromUChars (utf8_converter, out->data, dlen, tmp_buf, r, &uc_err); + clen = ucnv_getMaxCharSize(utf8_converter); + dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen); + g_byte_array_set_size(out, dlen); + r = ucnv_fromUChars(utf8_converter, out->data, dlen, tmp_buf, r, &uc_err); - if (!U_SUCCESS (uc_err)) { - g_free (tmp_buf); + if (!U_SUCCESS(uc_err)) { + g_free(tmp_buf); return FALSE; } - g_free (tmp_buf); + g_free(tmp_buf); out->len = r; return TRUE; } -void -rspamd_mime_charset_utf_enforce (gchar *in, gsize len) +void rspamd_mime_charset_utf_enforce(gchar *in, gsize len) { gchar *p, *end; goffset err_offset; @@ -568,25 +566,25 @@ rspamd_mime_charset_utf_enforce (gchar *in, gsize len) p = in; end = in + len; - while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate (p, len)) > 0) { - err_offset --; /* As it returns it 1 indexed */ + while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate(p, len)) > 0) { + err_offset--; /* As it returns it 1 indexed */ gint32 cur_offset = err_offset; while (cur_offset < len) { gint32 tmp = cur_offset; - U8_NEXT (p, cur_offset, len, uc); + U8_NEXT(p, cur_offset, len, uc); if (uc > 0) { /* Fill string between err_offset and tmp with `?` character */ - memset (p + err_offset, '?', tmp - err_offset); + memset(p + err_offset, '?', tmp - err_offset); break; } } if (uc < 0) { /* Fill till the end */ - memset (p + err_offset, '?', len - err_offset); + memset(p + err_offset, '?', len - err_offset); break; } @@ -596,23 +594,23 @@ rspamd_mime_charset_utf_enforce (gchar *in, gsize len) } const char * -rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen, - bool check_utf8) +rspamd_mime_charset_find_by_content(const gchar *in, gsize inlen, + bool check_utf8) { int nconsumed; bool is_reliable; const gchar *ced_name; if (check_utf8) { - if (rspamd_fast_utf8_validate (in, inlen) == 0) { + if (rspamd_fast_utf8_validate(in, inlen) == 0) { return UTF8_CHARSET; } } - ced_name = ced_encoding_detect (in, inlen, NULL, NULL, - NULL, 0, CED_EMAIL_CORPUS, - false, &nconsumed, &is_reliable); + ced_name = ced_encoding_detect(in, inlen, NULL, NULL, + NULL, 0, CED_EMAIL_CORPUS, + false, &nconsumed, &is_reliable); if (ced_name) { @@ -623,28 +621,28 @@ rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen, } static const char * -rspamd_mime_charset_find_by_content_maybe_split (const gchar *in, gsize inlen) +rspamd_mime_charset_find_by_content_maybe_split(const gchar *in, gsize inlen) { if (inlen < RSPAMD_CHARSET_MAX_CONTENT * 3) { - return rspamd_mime_charset_find_by_content (in, inlen, false); + return rspamd_mime_charset_find_by_content(in, inlen, false); } else { const gchar *c1, *c2, *c3; - c1 = rspamd_mime_charset_find_by_content (in, RSPAMD_CHARSET_MAX_CONTENT, false); - c2 = rspamd_mime_charset_find_by_content (in + inlen / 2, - RSPAMD_CHARSET_MAX_CONTENT, false); - c3 = rspamd_mime_charset_find_by_content (in + inlen - RSPAMD_CHARSET_MAX_CONTENT, - RSPAMD_CHARSET_MAX_CONTENT, false); + c1 = rspamd_mime_charset_find_by_content(in, RSPAMD_CHARSET_MAX_CONTENT, false); + c2 = rspamd_mime_charset_find_by_content(in + inlen / 2, + RSPAMD_CHARSET_MAX_CONTENT, false); + c3 = rspamd_mime_charset_find_by_content(in + inlen - RSPAMD_CHARSET_MAX_CONTENT, + RSPAMD_CHARSET_MAX_CONTENT, false); /* 7bit stuff */ - if (c1 && strcmp (c1, "US-ASCII") == 0) { + if (c1 && strcmp(c1, "US-ASCII") == 0) { c1 = NULL; /* Invalid - we have 8 bit there */ } - if (c2 && strcmp (c2, "US-ASCII") == 0) { + if (c2 && strcmp(c2, "US-ASCII") == 0) { c2 = NULL; /* Invalid - we have 8 bit there */ } - if (c3 && strcmp (c3, "US-ASCII") == 0) { + if (c3 && strcmp(c3, "US-ASCII") == 0) { c3 = NULL; /* Invalid - we have 8 bit there */ } @@ -679,45 +677,45 @@ rspamd_mime_charset_find_by_content_maybe_split (const gchar *in, gsize inlen) } gboolean -rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, - gchar *in, gsize len, gboolean content_check) +rspamd_mime_charset_utf_check(rspamd_ftok_t *charset, + gchar *in, gsize len, gboolean content_check) { const gchar *real_charset; if (utf_compatible_re == NULL) { - utf_compatible_re = rspamd_regexp_new ( - "^(?:utf-?8.*)|(?:us-ascii)|(?:ascii)|(?:ansi.*)|(?:CSASCII)$", - "i", NULL); + utf_compatible_re = rspamd_regexp_new( + "^(?:utf-?8.*)|(?:us-ascii)|(?:ascii)|(?:ansi.*)|(?:CSASCII)$", + "i", NULL); } if (charset->len == 0 || - rspamd_regexp_match (utf_compatible_re, - charset->begin, charset->len, TRUE)) { + rspamd_regexp_match(utf_compatible_re, + charset->begin, charset->len, TRUE)) { /* * In case of UTF8 charset we still can check the content to find * corner cases */ if (content_check) { - if (rspamd_fast_utf8_validate (in, len) != 0) { + if (rspamd_fast_utf8_validate(in, len) != 0) { real_charset = rspamd_mime_charset_find_by_content_maybe_split(in, len); if (real_charset) { - if (rspamd_regexp_match (utf_compatible_re, - real_charset, strlen (real_charset), TRUE)) { - RSPAMD_FTOK_ASSIGN (charset, UTF8_CHARSET); + if (rspamd_regexp_match(utf_compatible_re, + real_charset, strlen(real_charset), TRUE)) { + RSPAMD_FTOK_ASSIGN(charset, UTF8_CHARSET); return TRUE; } else { charset->begin = real_charset; - charset->len = strlen (real_charset); + charset->len = strlen(real_charset); return FALSE; } } - rspamd_mime_charset_utf_enforce (in, len); + rspamd_mime_charset_utf_enforce(in, len); } } @@ -727,9 +725,8 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, return FALSE; } -void -rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, - struct rspamd_mime_text_part *text_part) +void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task, + struct rspamd_mime_text_part *text_part) { GError *err = NULL; const gchar *charset = NULL; @@ -738,21 +735,21 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, rspamd_ftok_t charset_tok; struct rspamd_mime_part *part = text_part->mime_part; - if (rspamd_str_has_8bit (text_part->raw.begin, text_part->raw.len)) { + if (rspamd_str_has_8bit(text_part->raw.begin, text_part->raw.len)) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_RAW; } /* Allocate copy storage */ - part_content = g_byte_array_sized_new (text_part->parsed.len); - memcpy (part_content->data, text_part->parsed.begin, text_part->parsed.len); + part_content = g_byte_array_sized_new(text_part->parsed.len); + memcpy(part_content->data, text_part->parsed.begin, text_part->parsed.len); part_content->len = text_part->parsed.len; - rspamd_mempool_notify_alloc (task->task_pool, - part_content->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)g_byte_array_unref, part_content); + rspamd_mempool_notify_alloc(task->task_pool, + part_content->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) g_byte_array_unref, part_content); - if (rspamd_str_has_8bit (text_part->parsed.begin, text_part->parsed.len)) { - if (rspamd_fast_utf8_validate (text_part->parsed.begin, text_part->parsed.len) == 0) { + if (rspamd_str_has_8bit(text_part->parsed.begin, text_part->parsed.len)) { + if (rspamd_fast_utf8_validate(text_part->parsed.begin, text_part->parsed.len) == 0) { /* Valid UTF, likely all good */ need_charset_heuristic = FALSE; valid_utf8 = TRUE; @@ -770,18 +767,18 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, if (part->ct->charset.len == 0) { if (need_charset_heuristic) { - charset = rspamd_mime_charset_find_by_content_maybe_split (text_part->parsed.begin, - text_part->parsed.len); + charset = rspamd_mime_charset_find_by_content_maybe_split(text_part->parsed.begin, + text_part->parsed.len); if (charset != NULL) { - msg_info_task ("detected charset %s", charset); + msg_info_task("detected charset %s", charset); } checked = TRUE; text_part->real_charset = charset; } else if (valid_utf8) { - SET_PART_UTF (text_part); + SET_PART_UTF(text_part); text_part->utf_raw_content = part_content; text_part->real_charset = UTF8_CHARSET; @@ -789,15 +786,15 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, } } else { - charset = rspamd_mime_detect_charset (&part->ct->charset, - task->task_pool); + charset = rspamd_mime_detect_charset(&part->ct->charset, + task->task_pool); if (charset == NULL) { /* We don't know the real charset but can try heuristic */ if (need_charset_heuristic) { - charset = rspamd_mime_charset_find_by_content_maybe_split (part_content->data, - part_content->len); - msg_info_task ("detected charset: %s", charset); + charset = rspamd_mime_charset_find_by_content_maybe_split(part_content->data, + part_content->len); + msg_info_task("detected charset: %s", charset); checked = TRUE; text_part->real_charset = charset; } @@ -809,7 +806,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, else { text_part->real_charset = charset; - if (strcmp (charset, UTF8_CHARSET) != 0) { + if (strcmp(charset, UTF8_CHARSET) != 0) { /* * We have detected some charset, but we don't know which one, * so we need to reset valid utf8 flag and enforce it later @@ -820,21 +817,21 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, } if (text_part->real_charset == NULL) { - msg_info_task ("<%s>: has invalid charset; original charset: %T; Content-Type: \"%s\"", - MESSAGE_FIELD_CHECK (task, message_id), &part->ct->charset, - part->ct->cpy); - SET_PART_RAW (text_part); + msg_info_task("<%s>: has invalid charset; original charset: %T; Content-Type: \"%s\"", + MESSAGE_FIELD_CHECK(task, message_id), &part->ct->charset, + part->ct->cpy); + SET_PART_RAW(text_part); text_part->utf_raw_content = part_content; return; } - RSPAMD_FTOK_FROM_STR (&charset_tok, charset); + RSPAMD_FTOK_FROM_STR(&charset_tok, charset); if (!valid_utf8) { - if (rspamd_mime_charset_utf_check (&charset_tok, part_content->data, - part_content->len, !checked)) { - SET_PART_UTF (text_part); + if (rspamd_mime_charset_utf_check(&charset_tok, part_content->data, + part_content->len, !checked)) { + SET_PART_UTF(text_part); text_part->utf_raw_content = part_content; text_part->real_charset = UTF8_CHARSET; @@ -843,25 +840,25 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, else { charset = charset_tok.begin; - if (!rspamd_mime_text_part_utf8_convert (task, text_part, - part_content, charset, &err)) { - msg_warn_task ("<%s>: cannot convert from %s to utf8: %s", - MESSAGE_FIELD (task, message_id), - charset, - err ? err->message : "unknown problem"); - SET_PART_RAW (text_part); - g_error_free (err); + if (!rspamd_mime_text_part_utf8_convert(task, text_part, + part_content, charset, &err)) { + msg_warn_task("<%s>: cannot convert from %s to utf8: %s", + MESSAGE_FIELD(task, message_id), + charset, + err ? err->message : "unknown problem"); + SET_PART_RAW(text_part); + g_error_free(err); text_part->utf_raw_content = part_content; return; } - SET_PART_UTF (text_part); + SET_PART_UTF(text_part); text_part->real_charset = charset; } } else { - SET_PART_UTF (text_part); + SET_PART_UTF(text_part); text_part->utf_raw_content = part_content; } } diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h index c7b387ef0..ff8129265 100644 --- a/src/libmime/mime_encoding.h +++ b/src/libmime/mime_encoding.h @@ -21,7 +21,7 @@ #include "fstring.h" #include <unicode/uchar.h> -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -36,8 +36,8 @@ struct rspamd_charset_converter; * @param in * @return */ -const gchar *rspamd_mime_detect_charset (const rspamd_ftok_t *in, - rspamd_mempool_t *pool); +const gchar *rspamd_mime_detect_charset(const rspamd_ftok_t *in, + rspamd_mempool_t *pool); /** * Convert text chunk to utf-8. Input encoding is substituted using @@ -52,9 +52,9 @@ const gchar *rspamd_mime_detect_charset (const rspamd_ftok_t *in, * @param err * @return */ -gchar *rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, - gchar *input, gsize len, const gchar *in_enc, - gsize *olen, GError **err); +gchar *rspamd_mime_text_to_utf8(rspamd_mempool_t *pool, + gchar *input, gsize len, const gchar *in_enc, + gsize *olen, GError **err); /** * Converts data from `in` to `out`, @@ -67,10 +67,10 @@ gchar *rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool, * @param enc validated canonical charset name. If NULL, then utf8 check is done only * @return */ -gboolean rspamd_mime_to_utf8_byte_array (GByteArray *in, - GByteArray *out, - rspamd_mempool_t *pool, - const gchar *enc); +gboolean rspamd_mime_to_utf8_byte_array(GByteArray *in, + GByteArray *out, + rspamd_mempool_t *pool, + const gchar *enc); /** * Maybe convert part to utf-8 @@ -78,8 +78,8 @@ gboolean rspamd_mime_to_utf8_byte_array (GByteArray *in, * @param text_part * @return */ -void rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, - struct rspamd_mime_text_part *text_part); +void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task, + struct rspamd_mime_text_part *text_part); /** * Checks utf8 charset and normalize/validate utf8 string @@ -88,9 +88,9 @@ void rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, * @param len * @return */ -gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, - gchar *in, gsize len, - gboolean content_check); +gboolean rspamd_mime_charset_utf_check(rspamd_ftok_t *charset, + gchar *in, gsize len, + gboolean content_check); /** * Ensure that all characters in string are valid utf8 chars or replace them @@ -98,9 +98,9 @@ gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, * @param in * @param len */ -void rspamd_mime_charset_utf_enforce (gchar *in, gsize len); +void rspamd_mime_charset_utf_enforce(gchar *in, gsize len); - /** +/** * Gets cached converter * @param enc input encoding * @param pool pool to use for temporary normalisation @@ -108,11 +108,11 @@ void rspamd_mime_charset_utf_enforce (gchar *in, gsize len); * @param err output error * @return converter */ -struct rspamd_charset_converter *rspamd_mime_get_converter_cached ( - const gchar *enc, - rspamd_mempool_t *pool, - gboolean is_canon, - UErrorCode *err); +struct rspamd_charset_converter *rspamd_mime_get_converter_cached( + const gchar *enc, + rspamd_mempool_t *pool, + gboolean is_canon, + UErrorCode *err); /** * Performs charset->utf16 conversion @@ -125,12 +125,12 @@ struct rspamd_charset_converter *rspamd_mime_get_converter_cached ( * @return */ gint32 -rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv, - UChar *dest, - gint32 destCapacity, - const char *src, - gint32 srcLength, - UErrorCode *pErrorCode); +rspamd_converter_to_uchars(struct rspamd_charset_converter *cnv, + UChar *dest, + gint32 destCapacity, + const char *src, + gint32 srcLength, + UErrorCode *pErrorCode); /** * Detect charset in text @@ -138,10 +138,10 @@ rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv, * @param inlen * @return detected charset name or NULL */ -const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen, - bool check_utf8); +const char *rspamd_mime_charset_find_by_content(const gchar *in, gsize inlen, + bool check_utf8); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/mime_encoding_list.h b/src/libmime/mime_encoding_list.h index f03f008df..b5fc5e138 100644 --- a/src/libmime/mime_encoding_list.h +++ b/src/libmime/mime_encoding_list.h @@ -17,1561 +17,1561 @@ #define SRC_LIBMIME_MIME_ENCODING_LIST_H_ static const struct rspamd_charset_substitution sub[] = { - { - .input = "iso-646-us", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "ansi_x3.4-1968", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "iso-ir-6", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "iso_646.irv:1991", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "ascii", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "iso646-us", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "us", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "ibm367", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "cp367", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "csascii", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "ascii7", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "default", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "646", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "iso_646.irv:1983", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "iso969-us", - .canon = "ansi_x3.4-1986", - .flags = RSPAMD_CHARSET_FLAG_ASCII, - }, - { - .input = "tw-big5", - .canon = "big5", - .flags = 0, - }, - { - .input = "csbig5", - .canon = "big5", - .flags = 0, - }, - { - .input = "hkscs-big5", - .canon = "big5-hkscs", - .flags = 0, - }, - { - .input = "big5hk", - .canon = "big5-hkscs", - .flags = 0, - }, - { - .input = "big5-hkscs:unicode", - .canon = "big5-hkscs", - .flags = 0, - }, - { - .input = "extended_unix_code_packed_format_for_japanese", - .canon = "euc-jp", - .flags = 0, - }, - { - .input = "cseucpkdfmtjapanese", - .canon = "euc-jp", - .flags = 0, - }, - { - .input = "x-eucjp", - .canon = "euc-jp", - .flags = 0, - }, - { - .input = "x-euc-jp", - .canon = "euc-jp", - .flags = 0, - }, - { - .input = "unicode-1-1-utf-8", - .canon = "utf-8", - .flags = RSPAMD_CHARSET_FLAG_UTF, - }, - { - .input = "cseuckr", - .canon = "euc-kr", - .flags = 0, - }, - { - .input = "5601", - .canon = "euc-kr", - .flags = 0, - }, - { - .input = "ksc-5601", - .canon = "euc-kr", - .flags = 0, - }, - { - .input = "ksc-5601-1987", - .canon = "euc-kr", - .flags = 0, - }, - { - .input = "ksc-5601_1987", - .canon = "euc-kr", - .flags = 0, - }, - { - .input = "ksc5601", - .canon = "euc-kr", - .flags = 0, - }, - { - .input = "cns11643", - .canon = "euc-tw", - .flags = 0, - }, - { - .input = "ibm-euctw", - .canon = "euc-tw", - .flags = 0, - }, - { - .input = "gb-18030", - .canon = "gb18030", - .flags = 0, - }, - { - .input = "ibm1392", - .canon = "gb18030", - .flags = 0, - }, - { - .input = "ibm-1392", - .canon = "gb18030", - .flags = 0, - }, - { - .input = "gb18030-2000", - .canon = "gb18030", - .flags = 0, - }, - { - .input = "gb-2312", - .canon = "gb2312", - .flags = 0, - }, - { - .input = "csgb2312", - .canon = "gb2312", - .flags = 0, - }, - { - .input = "euc_cn", - .canon = "gb2312", - .flags = 0, - }, - { - .input = "euccn", - .canon = "gb2312", - .flags = 0, - }, - { - .input = "euc-cn", - .canon = "gb2312", - .flags = 0, - }, - { - .input = "gb-k", - .canon = "gbk", - .flags = 0, - }, - { - .input = "iso_8859-1:1987", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "iso-ir-100", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "iso_8859-1", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "latin1", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "l1", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "ibm819", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "cp819", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "csisolatin1", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "819", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "cp819", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "iso8859-1", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "8859-1", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "iso8859_1", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "iso_8859_1", - .canon = "iso-8859-1", - .flags = 0, - }, - { - .input = "iso_8859-2:1987", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "iso-ir-101", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "iso_8859-2", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "latin2", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "l2", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "csisolatin2", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "912", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "cp912", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "ibm-912", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "ibm912", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "iso8859-2", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "8859-2", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "iso8859_2", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "iso_8859_2", - .canon = "iso-8859-2", - .flags = 0, - }, - { - .input = "iso_8859-3:1988", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "iso-ir-109", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "iso_8859-3", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "latin3", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "l3", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "csisolatin3", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "913", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "cp913", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "ibm-913", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "ibm913", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "iso8859-3", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "8859-3", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "iso8859_3", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "iso_8859_3", - .canon = "iso-8859-3", - .flags = 0, - }, - { - .input = "iso_8859-4:1988", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "iso-ir-110", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "iso_8859-4", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "latin4", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "l4", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "csisolatin4", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "914", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "cp914", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "ibm-914", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "ibm914", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "iso8859-4", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "8859-4", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "iso8859_4", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "iso_8859_4", - .canon = "iso-8859-4", - .flags = 0, - }, - { - .input = "iso_8859-5:1988", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "iso-ir-144", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "iso_8859-5", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "cyrillic", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "csisolatincyrillic", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "915", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "cp915", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "ibm-915", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "ibm915", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "iso8859-5", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "8859-5", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "iso8859_5", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "iso_8859_5", - .canon = "iso-8859-5", - .flags = 0, - }, - { - .input = "iso_8859-6:1987", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "iso-ir-127", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "iso_8859-6", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "ecma-114", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "asmo-708", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "arabic", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "csisolatinarabic", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "1089", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "cp1089", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "ibm-1089", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "ibm1089", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "iso8859-6", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "8859-6", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "iso8859_6", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "iso_8859_6", - .canon = "iso-8859-6", - .flags = 0, - }, - { - .input = "iso_8859-7:1987", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "iso-ir-126", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "iso_8859-7", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "elot_928", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "ecma-118", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "greek", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "greek8", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "csisolatingreek", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "813", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "cp813", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "ibm-813", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "ibm813", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "iso8859-7", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "8859-7", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "iso8859_7", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "iso_8859_7", - .canon = "iso-8859-7", - .flags = 0, - }, - { - .input = "iso_8859-8:1988", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "iso-ir-138", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "iso_8859-8", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "hebrew", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "csisolatinhebrew", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "916", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "cp916", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "ibm-916", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "ibm916", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "iso8859-8", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "8859-8", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "iso8859_8", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "iso_8859_8", - .canon = "iso-8859-8", - .flags = 0, - }, - { - .input = "iso_8859-9:1989", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "iso-ir-148", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "iso_8859-9", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "latin5", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "l5", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "csisolatin5", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "920", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "cp920", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "ibm-920", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "ibm920", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "iso8859-9", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "8859-9", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "iso8859_9", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "iso_8859_9", - .canon = "iso-8859-9", - .flags = 0, - }, - { - .input = "iso_8859-13", - .canon = "iso-8859-13", - .flags = 0, - }, - { - .input = "iso8859-13", - .canon = "iso-8859-13", - .flags = 0, - }, - { - .input = "8859-13", - .canon = "iso-8859-13", - .flags = 0, - }, - { - .input = "iso8859_13", - .canon = "iso-8859-13", - .flags = 0, - }, - { - .input = "iso_8859_13", - .canon = "iso-8859-13", - .flags = 0, - }, - { - .input = "iso-ir-199", - .canon = "iso-8859-14", - .flags = 0, - }, - { - .input = "iso_8859-14:1998", - .canon = "iso-8859-14", - .flags = 0, - }, - { - .input = "iso_8859-14", - .canon = "iso-8859-14", - .flags = 0, - }, - { - .input = "latin8", - .canon = "iso-8859-14", - .flags = 0, - }, - { - .input = "iso-celtic", - .canon = "iso-8859-14", - .flags = 0, - }, - { - .input = "l8", - .canon = "iso-8859-14", - .flags = 0, - }, - { - .input = "csisolatin9", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "csisolatin0", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "latin9", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "latin0", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "923", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "cp923", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "ibm-923", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "ibm923", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "iso8859-15", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "iso_8859-15", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "8859-15", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "iso_8859-15_fdis", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "l9", - .canon = "iso-8859-15", - .flags = 0, - }, - { - .input = "koi-8-r", - .canon = "koi8-r", - .flags = 0, - }, - { - .input = "cskoi8r", - .canon = "koi8-r", - .flags = 0, - }, - { - .input = "koi8", - .canon = "koi8-r", - .flags = 0, - }, - { - .input = "koi-8-u", - .canon = "koi8-u", - .flags = 0, - }, - { - .input = "koi-8-t", - .canon = "koi8-t", - .flags = 0, - }, - { - .input = "shiftjis", - .canon = "shift_jis", - .flags = 0, - }, - { - .input = "ms_kanji", - .canon = "shift_jis", - .flags = 0, - }, - { - .input = "csshiftjis", - .canon = "shift_jis", - .flags = 0, - }, - { - .input = "cp-437", - .canon = "ibm437", - .flags = 0, - }, - { - .input = "cp437", - .canon = "ibm437", - .flags = 0, - }, - { - .input = "437", - .canon = "ibm437", - .flags = 0, - }, - { - .input = "cspc8codepage437437", - .canon = "ibm437", - .flags = 0, - }, - { - .input = "cspc8codepage437", - .canon = "ibm437", - .flags = 0, - }, - { - .input = "ibm-437", - .canon = "ibm437", - .flags = 0, - }, - { - .input = "cp-850", - .canon = "ibm850", - .flags = 0, - }, - { - .input = "cp850", - .canon = "ibm850", - .flags = 0, - }, - { - .input = "850", - .canon = "ibm850", - .flags = 0, - }, - { - .input = "cspc850multilingual850", - .canon = "ibm850", - .flags = 0, - }, - { - .input = "cspc850multilingual", - .canon = "ibm850", - .flags = 0, - }, - { - .input = "ibm-850", - .canon = "ibm850", - .flags = 0, - }, - { - .input = "cp-851", - .canon = "ibm851", - .flags = 0, - }, - { - .input = "cp851", - .canon = "ibm851", - .flags = 0, - }, - { - .input = "851", - .canon = "ibm851", - .flags = 0, - }, - { - .input = "csibm851", - .canon = "ibm851", - .flags = 0, - }, - { - .input = "cp-852", - .canon = "ibm852", - .flags = 0, - }, - { - .input = "cp852", - .canon = "ibm852", - .flags = 0, - }, - { - .input = "852", - .canon = "ibm852", - .flags = 0, - }, - { - .input = "cspcp852", - .canon = "ibm852", - .flags = 0, - }, - { - .input = "852", - .canon = "ibm852", - .flags = 0, - }, - { - .input = "cspcp852", - .canon = "ibm852", - .flags = 0, - }, - { - .input = "ibm-852", - .canon = "ibm852", - .flags = 0, - }, - { - .input = "cp-855", - .canon = "ibm855", - .flags = 0, - }, - { - .input = "cp855", - .canon = "ibm855", - .flags = 0, - }, - { - .input = "855", - .canon = "ibm855", - .flags = 0, - }, - { - .input = "csibm855", - .canon = "ibm855", - .flags = 0, - }, - { - .input = "cspcp855", - .canon = "ibm855", - .flags = 0, - }, - { - .input = "ibm-855", - .canon = "ibm855", - .flags = 0, - }, - { - .input = "cp-857", - .canon = "ibm857", - .flags = 0, - }, - { - .input = "cp857", - .canon = "ibm857", - .flags = 0, - }, - { - .input = "857", - .canon = "ibm857", - .flags = 0, - }, - { - .input = "csibm857", - .canon = "ibm857", - .flags = 0, - }, - { - .input = "857", - .canon = "ibm857", - .flags = 0, - }, - { - .input = "csibm857", - .canon = "ibm857", - .flags = 0, - }, - { - .input = "ibm-857", - .canon = "ibm857", - .flags = 0, - }, - { - .input = "cp-860", - .canon = "ibm860", - .flags = 0, - }, - { - .input = "cp860", - .canon = "ibm860", - .flags = 0, - }, - { - .input = "860", - .canon = "ibm860", - .flags = 0, - }, - { - .input = "csibm860", - .canon = "ibm860", - .flags = 0, - }, - { - .input = "860", - .canon = "ibm860", - .flags = 0, - }, - { - .input = "csibm860", - .canon = "ibm860", - .flags = 0, - }, - { - .input = "ibm-860", - .canon = "ibm860", - .flags = 0, - }, - { - .input = "cp-861", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "cp861", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "861", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "cp-is", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "csibm861", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "861", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "cp-is", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "csibm861", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "ibm-861", - .canon = "ibm861", - .flags = 0, - }, - { - .input = "cp-862", - .canon = "ibm862", - .flags = 0, - }, - { - .input = "cp862", - .canon = "ibm862", - .flags = 0, - }, - { - .input = "862", - .canon = "ibm862", - .flags = 0, - }, - { - .input = "cspc862latinhebrew862", - .canon = "ibm862", - .flags = 0, - }, - { - .input = "cspc862latinhebrew", - .canon = "ibm862", - .flags = 0, - }, - { - .input = "ibm-862", - .canon = "ibm862", - .flags = 0, - }, - { - .input = "cp-863", - .canon = "ibm863", - .flags = 0, - }, - { - .input = "cp863", - .canon = "ibm863", - .flags = 0, - }, - { - .input = "863", - .canon = "ibm863", - .flags = 0, - }, - { - .input = "csibm863", - .canon = "ibm863", - .flags = 0, - }, - { - .input = "863", - .canon = "ibm863", - .flags = 0, - }, - { - .input = "csibm863", - .canon = "ibm863", - .flags = 0, - }, - { - .input = "ibm-863", - .canon = "ibm863", - .flags = 0, - }, - { - .input = "cp-864", - .canon = "ibm864", - .flags = 0, - }, - { - .input = "cp864", - .canon = "ibm864", - .flags = 0, - }, - { - .input = "csibm864", - .canon = "ibm864", - .flags = 0, - }, - { - .input = "csibm864", - .canon = "ibm864", - .flags = 0, - }, - { - .input = "ibm-864", - .canon = "ibm864", - .flags = 0, - }, - { - .input = "cp-865", - .canon = "ibm865", - .flags = 0, - }, - { - .input = "cp865", - .canon = "ibm865", - .flags = 0, - }, - { - .input = "865", - .canon = "ibm865", - .flags = 0, - }, - { - .input = "csibm865", - .canon = "ibm865", - .flags = 0, - }, - { - .input = "865", - .canon = "ibm865", - .flags = 0, - }, - { - .input = "csibm865", - .canon = "ibm865", - .flags = 0, - }, - { - .input = "ibm-865", - .canon = "ibm865", - .flags = 0, - }, - { - .input = "cp-866", - .canon = "ibm866", - .flags = 0, - }, - { - .input = "cp866", - .canon = "ibm866", - .flags = 0, - }, - { - .input = "866", - .canon = "ibm866", - .flags = 0, - }, - { - .input = "csibm866", - .canon = "ibm866", - .flags = 0, - }, - { - .input = "866", - .canon = "ibm866", - .flags = 0, - }, - { - .input = "csibm866", - .canon = "ibm866", - .flags = 0, - }, - { - .input = "ibm-866", - .canon = "ibm866", - .flags = 0, - }, - { - .input = "cp-868", - .canon = "ibm868", - .flags = 0, - }, - { - .input = "cp868", - .canon = "ibm868", - .flags = 0, - }, - { - .input = "cp-ar", - .canon = "ibm868", - .flags = 0, - }, - { - .input = "csibm868", - .canon = "ibm868", - .flags = 0, - }, - { - .input = "ibm-868", - .canon = "ibm868", - .flags = 0, - }, - { - .input = "cp-869", - .canon = "ibm869", - .flags = 0, - }, - { - .input = "cp869", - .canon = "ibm869", - .flags = 0, - }, - { - .input = "869", - .canon = "ibm869", - .flags = 0, - }, - { - .input = "cp-gr", - .canon = "ibm869", - .flags = 0, - }, - { - .input = "csibm869", - .canon = "ibm869", - .flags = 0, - }, - { - .input = "cp-891", - .canon = "ibm891", - .flags = 0, - }, - { - .input = "cp891", - .canon = "ibm891", - .flags = 0, - }, - { - .input = "csibm891", - .canon = "ibm891", - .flags = 0, - }, - { - .input = "cp-903", - .canon = "ibm903", - .flags = 0, - }, - { - .input = "cp903", - .canon = "ibm903", - .flags = 0, - }, - { - .input = "csibm903", - .canon = "ibm903", - .flags = 0, - }, - { - .input = "cp-904", - .canon = "ibm904", - .flags = 0, - }, - { - .input = "cp904", - .canon = "ibm904", - .flags = 0, - }, - { - .input = "904", - .canon = "ibm904", - .flags = 0, - }, - { - .input = "csibm904", - .canon = "ibm904", - .flags = 0, - }, - { - .input = "cp-1251", - .canon = "cp1251", - .flags = 0, - }, - { - .input = "windows-1251", - .canon = "cp1251", - .flags = 0, - }, - { - .input = "cp-1255", - .canon = "cp1255", - .flags = 0, - }, - { - .input = "windows-1255", - .canon = "cp1255", - .flags = 0, - }, - { - .input = "tis620.2533", - .canon = "tis-620", - .flags = 0, - }, + { + .input = "iso-646-us", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "ansi_x3.4-1968", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "iso-ir-6", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "iso_646.irv:1991", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "ascii", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "iso646-us", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "us", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "ibm367", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "cp367", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "csascii", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "ascii7", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "default", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "646", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "iso_646.irv:1983", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "iso969-us", + .canon = "ansi_x3.4-1986", + .flags = RSPAMD_CHARSET_FLAG_ASCII, + }, + { + .input = "tw-big5", + .canon = "big5", + .flags = 0, + }, + { + .input = "csbig5", + .canon = "big5", + .flags = 0, + }, + { + .input = "hkscs-big5", + .canon = "big5-hkscs", + .flags = 0, + }, + { + .input = "big5hk", + .canon = "big5-hkscs", + .flags = 0, + }, + { + .input = "big5-hkscs:unicode", + .canon = "big5-hkscs", + .flags = 0, + }, + { + .input = "extended_unix_code_packed_format_for_japanese", + .canon = "euc-jp", + .flags = 0, + }, + { + .input = "cseucpkdfmtjapanese", + .canon = "euc-jp", + .flags = 0, + }, + { + .input = "x-eucjp", + .canon = "euc-jp", + .flags = 0, + }, + { + .input = "x-euc-jp", + .canon = "euc-jp", + .flags = 0, + }, + { + .input = "unicode-1-1-utf-8", + .canon = "utf-8", + .flags = RSPAMD_CHARSET_FLAG_UTF, + }, + { + .input = "cseuckr", + .canon = "euc-kr", + .flags = 0, + }, + { + .input = "5601", + .canon = "euc-kr", + .flags = 0, + }, + { + .input = "ksc-5601", + .canon = "euc-kr", + .flags = 0, + }, + { + .input = "ksc-5601-1987", + .canon = "euc-kr", + .flags = 0, + }, + { + .input = "ksc-5601_1987", + .canon = "euc-kr", + .flags = 0, + }, + { + .input = "ksc5601", + .canon = "euc-kr", + .flags = 0, + }, + { + .input = "cns11643", + .canon = "euc-tw", + .flags = 0, + }, + { + .input = "ibm-euctw", + .canon = "euc-tw", + .flags = 0, + }, + { + .input = "gb-18030", + .canon = "gb18030", + .flags = 0, + }, + { + .input = "ibm1392", + .canon = "gb18030", + .flags = 0, + }, + { + .input = "ibm-1392", + .canon = "gb18030", + .flags = 0, + }, + { + .input = "gb18030-2000", + .canon = "gb18030", + .flags = 0, + }, + { + .input = "gb-2312", + .canon = "gb2312", + .flags = 0, + }, + { + .input = "csgb2312", + .canon = "gb2312", + .flags = 0, + }, + { + .input = "euc_cn", + .canon = "gb2312", + .flags = 0, + }, + { + .input = "euccn", + .canon = "gb2312", + .flags = 0, + }, + { + .input = "euc-cn", + .canon = "gb2312", + .flags = 0, + }, + { + .input = "gb-k", + .canon = "gbk", + .flags = 0, + }, + { + .input = "iso_8859-1:1987", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "iso-ir-100", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "iso_8859-1", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "latin1", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "l1", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "ibm819", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "cp819", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "csisolatin1", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "819", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "cp819", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "iso8859-1", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "8859-1", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "iso8859_1", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "iso_8859_1", + .canon = "iso-8859-1", + .flags = 0, + }, + { + .input = "iso_8859-2:1987", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "iso-ir-101", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "iso_8859-2", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "latin2", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "l2", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "csisolatin2", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "912", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "cp912", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "ibm-912", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "ibm912", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "iso8859-2", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "8859-2", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "iso8859_2", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "iso_8859_2", + .canon = "iso-8859-2", + .flags = 0, + }, + { + .input = "iso_8859-3:1988", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "iso-ir-109", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "iso_8859-3", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "latin3", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "l3", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "csisolatin3", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "913", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "cp913", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "ibm-913", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "ibm913", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "iso8859-3", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "8859-3", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "iso8859_3", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "iso_8859_3", + .canon = "iso-8859-3", + .flags = 0, + }, + { + .input = "iso_8859-4:1988", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "iso-ir-110", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "iso_8859-4", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "latin4", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "l4", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "csisolatin4", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "914", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "cp914", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "ibm-914", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "ibm914", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "iso8859-4", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "8859-4", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "iso8859_4", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "iso_8859_4", + .canon = "iso-8859-4", + .flags = 0, + }, + { + .input = "iso_8859-5:1988", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "iso-ir-144", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "iso_8859-5", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "cyrillic", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "csisolatincyrillic", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "915", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "cp915", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "ibm-915", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "ibm915", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "iso8859-5", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "8859-5", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "iso8859_5", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "iso_8859_5", + .canon = "iso-8859-5", + .flags = 0, + }, + { + .input = "iso_8859-6:1987", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "iso-ir-127", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "iso_8859-6", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "ecma-114", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "asmo-708", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "arabic", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "csisolatinarabic", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "1089", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "cp1089", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "ibm-1089", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "ibm1089", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "iso8859-6", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "8859-6", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "iso8859_6", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "iso_8859_6", + .canon = "iso-8859-6", + .flags = 0, + }, + { + .input = "iso_8859-7:1987", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "iso-ir-126", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "iso_8859-7", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "elot_928", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "ecma-118", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "greek", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "greek8", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "csisolatingreek", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "813", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "cp813", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "ibm-813", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "ibm813", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "iso8859-7", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "8859-7", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "iso8859_7", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "iso_8859_7", + .canon = "iso-8859-7", + .flags = 0, + }, + { + .input = "iso_8859-8:1988", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "iso-ir-138", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "iso_8859-8", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "hebrew", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "csisolatinhebrew", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "916", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "cp916", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "ibm-916", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "ibm916", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "iso8859-8", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "8859-8", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "iso8859_8", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "iso_8859_8", + .canon = "iso-8859-8", + .flags = 0, + }, + { + .input = "iso_8859-9:1989", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "iso-ir-148", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "iso_8859-9", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "latin5", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "l5", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "csisolatin5", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "920", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "cp920", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "ibm-920", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "ibm920", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "iso8859-9", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "8859-9", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "iso8859_9", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "iso_8859_9", + .canon = "iso-8859-9", + .flags = 0, + }, + { + .input = "iso_8859-13", + .canon = "iso-8859-13", + .flags = 0, + }, + { + .input = "iso8859-13", + .canon = "iso-8859-13", + .flags = 0, + }, + { + .input = "8859-13", + .canon = "iso-8859-13", + .flags = 0, + }, + { + .input = "iso8859_13", + .canon = "iso-8859-13", + .flags = 0, + }, + { + .input = "iso_8859_13", + .canon = "iso-8859-13", + .flags = 0, + }, + { + .input = "iso-ir-199", + .canon = "iso-8859-14", + .flags = 0, + }, + { + .input = "iso_8859-14:1998", + .canon = "iso-8859-14", + .flags = 0, + }, + { + .input = "iso_8859-14", + .canon = "iso-8859-14", + .flags = 0, + }, + { + .input = "latin8", + .canon = "iso-8859-14", + .flags = 0, + }, + { + .input = "iso-celtic", + .canon = "iso-8859-14", + .flags = 0, + }, + { + .input = "l8", + .canon = "iso-8859-14", + .flags = 0, + }, + { + .input = "csisolatin9", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "csisolatin0", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "latin9", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "latin0", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "923", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "cp923", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "ibm-923", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "ibm923", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "iso8859-15", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "iso_8859-15", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "8859-15", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "iso_8859-15_fdis", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "l9", + .canon = "iso-8859-15", + .flags = 0, + }, + { + .input = "koi-8-r", + .canon = "koi8-r", + .flags = 0, + }, + { + .input = "cskoi8r", + .canon = "koi8-r", + .flags = 0, + }, + { + .input = "koi8", + .canon = "koi8-r", + .flags = 0, + }, + { + .input = "koi-8-u", + .canon = "koi8-u", + .flags = 0, + }, + { + .input = "koi-8-t", + .canon = "koi8-t", + .flags = 0, + }, + { + .input = "shiftjis", + .canon = "shift_jis", + .flags = 0, + }, + { + .input = "ms_kanji", + .canon = "shift_jis", + .flags = 0, + }, + { + .input = "csshiftjis", + .canon = "shift_jis", + .flags = 0, + }, + { + .input = "cp-437", + .canon = "ibm437", + .flags = 0, + }, + { + .input = "cp437", + .canon = "ibm437", + .flags = 0, + }, + { + .input = "437", + .canon = "ibm437", + .flags = 0, + }, + { + .input = "cspc8codepage437437", + .canon = "ibm437", + .flags = 0, + }, + { + .input = "cspc8codepage437", + .canon = "ibm437", + .flags = 0, + }, + { + .input = "ibm-437", + .canon = "ibm437", + .flags = 0, + }, + { + .input = "cp-850", + .canon = "ibm850", + .flags = 0, + }, + { + .input = "cp850", + .canon = "ibm850", + .flags = 0, + }, + { + .input = "850", + .canon = "ibm850", + .flags = 0, + }, + { + .input = "cspc850multilingual850", + .canon = "ibm850", + .flags = 0, + }, + { + .input = "cspc850multilingual", + .canon = "ibm850", + .flags = 0, + }, + { + .input = "ibm-850", + .canon = "ibm850", + .flags = 0, + }, + { + .input = "cp-851", + .canon = "ibm851", + .flags = 0, + }, + { + .input = "cp851", + .canon = "ibm851", + .flags = 0, + }, + { + .input = "851", + .canon = "ibm851", + .flags = 0, + }, + { + .input = "csibm851", + .canon = "ibm851", + .flags = 0, + }, + { + .input = "cp-852", + .canon = "ibm852", + .flags = 0, + }, + { + .input = "cp852", + .canon = "ibm852", + .flags = 0, + }, + { + .input = "852", + .canon = "ibm852", + .flags = 0, + }, + { + .input = "cspcp852", + .canon = "ibm852", + .flags = 0, + }, + { + .input = "852", + .canon = "ibm852", + .flags = 0, + }, + { + .input = "cspcp852", + .canon = "ibm852", + .flags = 0, + }, + { + .input = "ibm-852", + .canon = "ibm852", + .flags = 0, + }, + { + .input = "cp-855", + .canon = "ibm855", + .flags = 0, + }, + { + .input = "cp855", + .canon = "ibm855", + .flags = 0, + }, + { + .input = "855", + .canon = "ibm855", + .flags = 0, + }, + { + .input = "csibm855", + .canon = "ibm855", + .flags = 0, + }, + { + .input = "cspcp855", + .canon = "ibm855", + .flags = 0, + }, + { + .input = "ibm-855", + .canon = "ibm855", + .flags = 0, + }, + { + .input = "cp-857", + .canon = "ibm857", + .flags = 0, + }, + { + .input = "cp857", + .canon = "ibm857", + .flags = 0, + }, + { + .input = "857", + .canon = "ibm857", + .flags = 0, + }, + { + .input = "csibm857", + .canon = "ibm857", + .flags = 0, + }, + { + .input = "857", + .canon = "ibm857", + .flags = 0, + }, + { + .input = "csibm857", + .canon = "ibm857", + .flags = 0, + }, + { + .input = "ibm-857", + .canon = "ibm857", + .flags = 0, + }, + { + .input = "cp-860", + .canon = "ibm860", + .flags = 0, + }, + { + .input = "cp860", + .canon = "ibm860", + .flags = 0, + }, + { + .input = "860", + .canon = "ibm860", + .flags = 0, + }, + { + .input = "csibm860", + .canon = "ibm860", + .flags = 0, + }, + { + .input = "860", + .canon = "ibm860", + .flags = 0, + }, + { + .input = "csibm860", + .canon = "ibm860", + .flags = 0, + }, + { + .input = "ibm-860", + .canon = "ibm860", + .flags = 0, + }, + { + .input = "cp-861", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "cp861", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "861", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "cp-is", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "csibm861", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "861", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "cp-is", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "csibm861", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "ibm-861", + .canon = "ibm861", + .flags = 0, + }, + { + .input = "cp-862", + .canon = "ibm862", + .flags = 0, + }, + { + .input = "cp862", + .canon = "ibm862", + .flags = 0, + }, + { + .input = "862", + .canon = "ibm862", + .flags = 0, + }, + { + .input = "cspc862latinhebrew862", + .canon = "ibm862", + .flags = 0, + }, + { + .input = "cspc862latinhebrew", + .canon = "ibm862", + .flags = 0, + }, + { + .input = "ibm-862", + .canon = "ibm862", + .flags = 0, + }, + { + .input = "cp-863", + .canon = "ibm863", + .flags = 0, + }, + { + .input = "cp863", + .canon = "ibm863", + .flags = 0, + }, + { + .input = "863", + .canon = "ibm863", + .flags = 0, + }, + { + .input = "csibm863", + .canon = "ibm863", + .flags = 0, + }, + { + .input = "863", + .canon = "ibm863", + .flags = 0, + }, + { + .input = "csibm863", + .canon = "ibm863", + .flags = 0, + }, + { + .input = "ibm-863", + .canon = "ibm863", + .flags = 0, + }, + { + .input = "cp-864", + .canon = "ibm864", + .flags = 0, + }, + { + .input = "cp864", + .canon = "ibm864", + .flags = 0, + }, + { + .input = "csibm864", + .canon = "ibm864", + .flags = 0, + }, + { + .input = "csibm864", + .canon = "ibm864", + .flags = 0, + }, + { + .input = "ibm-864", + .canon = "ibm864", + .flags = 0, + }, + { + .input = "cp-865", + .canon = "ibm865", + .flags = 0, + }, + { + .input = "cp865", + .canon = "ibm865", + .flags = 0, + }, + { + .input = "865", + .canon = "ibm865", + .flags = 0, + }, + { + .input = "csibm865", + .canon = "ibm865", + .flags = 0, + }, + { + .input = "865", + .canon = "ibm865", + .flags = 0, + }, + { + .input = "csibm865", + .canon = "ibm865", + .flags = 0, + }, + { + .input = "ibm-865", + .canon = "ibm865", + .flags = 0, + }, + { + .input = "cp-866", + .canon = "ibm866", + .flags = 0, + }, + { + .input = "cp866", + .canon = "ibm866", + .flags = 0, + }, + { + .input = "866", + .canon = "ibm866", + .flags = 0, + }, + { + .input = "csibm866", + .canon = "ibm866", + .flags = 0, + }, + { + .input = "866", + .canon = "ibm866", + .flags = 0, + }, + { + .input = "csibm866", + .canon = "ibm866", + .flags = 0, + }, + { + .input = "ibm-866", + .canon = "ibm866", + .flags = 0, + }, + { + .input = "cp-868", + .canon = "ibm868", + .flags = 0, + }, + { + .input = "cp868", + .canon = "ibm868", + .flags = 0, + }, + { + .input = "cp-ar", + .canon = "ibm868", + .flags = 0, + }, + { + .input = "csibm868", + .canon = "ibm868", + .flags = 0, + }, + { + .input = "ibm-868", + .canon = "ibm868", + .flags = 0, + }, + { + .input = "cp-869", + .canon = "ibm869", + .flags = 0, + }, + { + .input = "cp869", + .canon = "ibm869", + .flags = 0, + }, + { + .input = "869", + .canon = "ibm869", + .flags = 0, + }, + { + .input = "cp-gr", + .canon = "ibm869", + .flags = 0, + }, + { + .input = "csibm869", + .canon = "ibm869", + .flags = 0, + }, + { + .input = "cp-891", + .canon = "ibm891", + .flags = 0, + }, + { + .input = "cp891", + .canon = "ibm891", + .flags = 0, + }, + { + .input = "csibm891", + .canon = "ibm891", + .flags = 0, + }, + { + .input = "cp-903", + .canon = "ibm903", + .flags = 0, + }, + { + .input = "cp903", + .canon = "ibm903", + .flags = 0, + }, + { + .input = "csibm903", + .canon = "ibm903", + .flags = 0, + }, + { + .input = "cp-904", + .canon = "ibm904", + .flags = 0, + }, + { + .input = "cp904", + .canon = "ibm904", + .flags = 0, + }, + { + .input = "904", + .canon = "ibm904", + .flags = 0, + }, + { + .input = "csibm904", + .canon = "ibm904", + .flags = 0, + }, + { + .input = "cp-1251", + .canon = "cp1251", + .flags = 0, + }, + { + .input = "windows-1251", + .canon = "cp1251", + .flags = 0, + }, + { + .input = "cp-1255", + .canon = "cp1255", + .flags = 0, + }, + { + .input = "windows-1255", + .canon = "cp1255", + .flags = 0, + }, + { + .input = "tis620.2533", + .canon = "tis-620", + .flags = 0, + }, }; #endif /* SRC_LIBMIME_MIME_ENCODING_LIST_H_ */ diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index d3d40c870..88e9e16a1 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -24,98 +24,98 @@ #include "lua/lua_common.h" #include "utlist.h" -gboolean rspamd_compare_encoding (struct rspamd_task *task, - GArray * args, - void *unused); -gboolean rspamd_header_exists (struct rspamd_task *task, - GArray * args, - void *unused); -gboolean rspamd_parts_distance (struct rspamd_task *task, - GArray * args, - void *unused); -gboolean rspamd_recipients_distance (struct rspamd_task *task, - GArray * args, - void *unused); -gboolean rspamd_has_only_html_part (struct rspamd_task *task, - GArray * args, - void *unused); -gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, - GArray * args, - void *unused); -gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, - GArray * args, - void *unused); -gboolean rspamd_is_html_balanced (struct rspamd_task *task, - GArray * args, - void *unused); -gboolean rspamd_has_html_tag (struct rspamd_task *task, - GArray * args, +gboolean rspamd_compare_encoding(struct rspamd_task *task, + GArray *args, + void *unused); +gboolean rspamd_header_exists(struct rspamd_task *task, + GArray *args, void *unused); -gboolean rspamd_has_fake_html (struct rspamd_task *task, - GArray * args, +gboolean rspamd_parts_distance(struct rspamd_task *task, + GArray *args, void *unused); -static gboolean rspamd_raw_header_exists (struct rspamd_task *task, - GArray * args, +gboolean rspamd_recipients_distance(struct rspamd_task *task, + GArray *args, + void *unused); +gboolean rspamd_has_only_html_part(struct rspamd_task *task, + GArray *args, + void *unused); +gboolean rspamd_is_recipients_sorted(struct rspamd_task *task, + GArray *args, + void *unused); +gboolean rspamd_compare_transfer_encoding(struct rspamd_task *task, + GArray *args, void *unused); -static gboolean rspamd_check_smtp_data (struct rspamd_task *task, - GArray * args, - void *unused); -static gboolean rspamd_content_type_is_type (struct rspamd_task * task, - GArray * args, - void *unused); -static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task, - GArray * args, - void *unused); -static gboolean rspamd_content_type_has_param (struct rspamd_task * task, - GArray * args, - void *unused); -static gboolean rspamd_content_type_compare_param (struct rspamd_task * task, - GArray * args, - void *unused); -static gboolean rspamd_has_content_part (struct rspamd_task *task, - GArray * args, +gboolean rspamd_is_html_balanced(struct rspamd_task *task, + GArray *args, + void *unused); +gboolean rspamd_has_html_tag(struct rspamd_task *task, + GArray *args, + void *unused); +gboolean rspamd_has_fake_html(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_raw_header_exists(struct rspamd_task *task, + GArray *args, void *unused); -static gboolean rspamd_has_content_part_len (struct rspamd_task *task, - GArray * args, - void *unused); -static gboolean rspamd_is_empty_body (struct rspamd_task *task, - GArray * args, - void *unused); -static gboolean rspamd_has_flag_expr (struct rspamd_task *task, - GArray * args, - void *unused); -static gboolean rspamd_has_symbol_expr (struct rspamd_task *task, - GArray * args, - void *unused); - -static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len, - rspamd_mempool_t *pool, gpointer ud, GError **err); -static gdouble rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom); -static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom); -static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom); +static gboolean rspamd_check_smtp_data(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_content_type_is_type(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_content_type_is_subtype(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_content_type_has_param(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_content_type_compare_param(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_has_content_part(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_has_content_part_len(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_is_empty_body(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_has_flag_expr(struct rspamd_task *task, + GArray *args, + void *unused); +static gboolean rspamd_has_symbol_expr(struct rspamd_task *task, + GArray *args, + void *unused); + +static rspamd_expression_atom_t *rspamd_mime_expr_parse(const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err); +static gdouble rspamd_mime_expr_process(void *ud, rspamd_expression_atom_t *atom); +static gint rspamd_mime_expr_priority(rspamd_expression_atom_t *atom); +static void rspamd_mime_expr_destroy(rspamd_expression_atom_t *atom); /** * Regexp structure */ struct rspamd_regexp_atom { - enum rspamd_re_type type; /**< regexp type */ - gchar *regexp_text; /**< regexp text representation */ - rspamd_regexp_t *regexp; /**< regexp structure */ + enum rspamd_re_type type; /**< regexp type */ + gchar *regexp_text; /**< regexp text representation */ + rspamd_regexp_t *regexp; /**< regexp structure */ union { - const gchar *header; /**< header name for header regexps */ - const gchar *selector; /**< selector name for lua selector regexp */ + const gchar *header; /**< header name for header regexps */ + const gchar *selector; /**< selector name for lua selector regexp */ } extra; - gboolean is_test; /**< true if this expression must be tested */ - gboolean is_strong; /**< true if headers search must be case sensitive */ - gboolean is_multiple; /**< true if we need to match all inclusions of atom */ + gboolean is_test; /**< true if this expression must be tested */ + gboolean is_strong; /**< true if headers search must be case sensitive */ + gboolean is_multiple; /**< true if we need to match all inclusions of atom */ }; /** * Rspamd expression function */ struct rspamd_function_atom { - gchar *name; /**< name of function */ - GArray *args; /**< its args */ + gchar *name; /**< name of function */ + GArray *args; /**< its args */ }; enum rspamd_mime_atom_type { @@ -145,128 +145,127 @@ static struct _fl { rspamd_internal_func_t func; void *user_data; } rspamd_functions_list[] = { - {"check_smtp_data", rspamd_check_smtp_data, NULL}, - {"compare_encoding", rspamd_compare_encoding, NULL}, - {"compare_parts_distance", rspamd_parts_distance, NULL}, - {"compare_recipients_distance", rspamd_recipients_distance, NULL}, - {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, - {"content_type_compare_param", rspamd_content_type_compare_param, NULL}, - {"content_type_has_param", rspamd_content_type_has_param, NULL}, - {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL}, - {"content_type_is_type", rspamd_content_type_is_type, NULL}, - {"has_content_part", rspamd_has_content_part, NULL}, - {"has_content_part_len", rspamd_has_content_part_len, NULL}, - {"has_fake_html", rspamd_has_fake_html, NULL}, - {"has_flag", rspamd_has_flag_expr, NULL}, - {"has_html_tag", rspamd_has_html_tag, NULL}, - {"has_only_html_part", rspamd_has_only_html_part, NULL}, - {"has_symbol", rspamd_has_symbol_expr, NULL}, - {"header_exists", rspamd_header_exists, NULL}, - {"is_empty_body", rspamd_is_empty_body, NULL}, - {"is_html_balanced", rspamd_is_html_balanced, NULL}, - {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}, - {"raw_header_exists", rspamd_raw_header_exists, NULL}, + {"check_smtp_data", rspamd_check_smtp_data, NULL}, + {"compare_encoding", rspamd_compare_encoding, NULL}, + {"compare_parts_distance", rspamd_parts_distance, NULL}, + {"compare_recipients_distance", rspamd_recipients_distance, NULL}, + {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, + {"content_type_compare_param", rspamd_content_type_compare_param, NULL}, + {"content_type_has_param", rspamd_content_type_has_param, NULL}, + {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL}, + {"content_type_is_type", rspamd_content_type_is_type, NULL}, + {"has_content_part", rspamd_has_content_part, NULL}, + {"has_content_part_len", rspamd_has_content_part_len, NULL}, + {"has_fake_html", rspamd_has_fake_html, NULL}, + {"has_flag", rspamd_has_flag_expr, NULL}, + {"has_html_tag", rspamd_has_html_tag, NULL}, + {"has_only_html_part", rspamd_has_only_html_part, NULL}, + {"has_symbol", rspamd_has_symbol_expr, NULL}, + {"header_exists", rspamd_header_exists, NULL}, + {"is_empty_body", rspamd_is_empty_body, NULL}, + {"is_html_balanced", rspamd_is_html_balanced, NULL}, + {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL}, + {"raw_header_exists", rspamd_raw_header_exists, NULL}, }; const struct rspamd_atom_subr mime_expr_subr = { .parse = rspamd_mime_expr_parse, .process = rspamd_mime_expr_process, .priority = rspamd_mime_expr_priority, - .destroy = rspamd_mime_expr_destroy -}; + .destroy = rspamd_mime_expr_destroy}; static struct _fl *list_ptr = &rspamd_functions_list[0]; -static guint32 functions_number = sizeof (rspamd_functions_list) / - sizeof (struct _fl); +static guint32 functions_number = sizeof(rspamd_functions_list) / + sizeof(struct _fl); static gboolean list_allocated = FALSE; /* Bsearch routine */ static gint -fl_cmp (const void *s1, const void *s2) +fl_cmp(const void *s1, const void *s2) { - struct _fl *fl1 = (struct _fl *)s1; - struct _fl *fl2 = (struct _fl *)s2; - return strcmp (fl1->name, fl2->name); + struct _fl *fl1 = (struct _fl *) s1; + struct _fl *fl2 = (struct _fl *) s2; + return strcmp(fl1->name, fl2->name); } static GQuark -rspamd_mime_expr_quark (void) +rspamd_mime_expr_quark(void) { - return g_quark_from_static_string ("mime-expressions"); + return g_quark_from_static_string("mime-expressions"); } #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0) static gboolean -rspamd_parse_long_option (const gchar *start, gsize len, - struct rspamd_regexp_atom *a) +rspamd_parse_long_option(const gchar *start, gsize len, + struct rspamd_regexp_atom *a) { gboolean ret = FALSE; - if (TYPE_CHECK (start, "body", len)) { + if (TYPE_CHECK(start, "body", len)) { ret = TRUE; a->type = RSPAMD_RE_BODY; } - else if (TYPE_CHECK (start, "part", len) || - TYPE_CHECK (start, "mime", len)) { + else if (TYPE_CHECK(start, "part", len) || + TYPE_CHECK(start, "mime", len)) { ret = TRUE; a->type = RSPAMD_RE_MIME; } - else if (TYPE_CHECK (start, "raw_part", len) || - TYPE_CHECK (start, "raw_mime", len) || - TYPE_CHECK (start, "mime_raw", len)) { + else if (TYPE_CHECK(start, "raw_part", len) || + TYPE_CHECK(start, "raw_mime", len) || + TYPE_CHECK(start, "mime_raw", len)) { ret = TRUE; a->type = RSPAMD_RE_RAWMIME; } - else if (TYPE_CHECK (start, "header", len)) { + else if (TYPE_CHECK(start, "header", len)) { ret = TRUE; a->type = RSPAMD_RE_HEADER; } - else if (TYPE_CHECK (start, "mime_header", len) || - TYPE_CHECK (start, "header_mime", len)) { + else if (TYPE_CHECK(start, "mime_header", len) || + TYPE_CHECK(start, "header_mime", len)) { ret = TRUE; a->type = RSPAMD_RE_MIMEHEADER; } - else if (TYPE_CHECK (start, "raw_header", len) || - TYPE_CHECK (start, "header_raw", len)) { + else if (TYPE_CHECK(start, "raw_header", len) || + TYPE_CHECK(start, "header_raw", len)) { ret = TRUE; a->type = RSPAMD_RE_RAWHEADER; } - else if (TYPE_CHECK (start, "all_header", len) || - TYPE_CHECK (start, "header_all", len) || - TYPE_CHECK (start, "all_headers", len)) { + else if (TYPE_CHECK(start, "all_header", len) || + TYPE_CHECK(start, "header_all", len) || + TYPE_CHECK(start, "all_headers", len)) { ret = TRUE; a->type = RSPAMD_RE_ALLHEADER; } - else if (TYPE_CHECK (start, "url", len)) { + else if (TYPE_CHECK(start, "url", len)) { ret = TRUE; a->type = RSPAMD_RE_URL; } - else if (TYPE_CHECK (start, "email", len)) { + else if (TYPE_CHECK(start, "email", len)) { ret = TRUE; a->type = RSPAMD_RE_EMAIL; } - else if (TYPE_CHECK (start, "sa_body", len)) { + else if (TYPE_CHECK(start, "sa_body", len)) { ret = TRUE; a->type = RSPAMD_RE_SABODY; } - else if (TYPE_CHECK (start, "sa_raw_body", len) || - TYPE_CHECK (start, "sa_body_raw", len)) { + else if (TYPE_CHECK(start, "sa_raw_body", len) || + TYPE_CHECK(start, "sa_body_raw", len)) { ret = TRUE; a->type = RSPAMD_RE_SARAWBODY; } - else if (TYPE_CHECK (start, "words", len)) { + else if (TYPE_CHECK(start, "words", len)) { ret = TRUE; a->type = RSPAMD_RE_WORDS; } - else if (TYPE_CHECK (start, "raw_words", len)) { + else if (TYPE_CHECK(start, "raw_words", len)) { ret = TRUE; a->type = RSPAMD_RE_RAWWORDS; } - else if (TYPE_CHECK (start, "stem_words", len)) { + else if (TYPE_CHECK(start, "stem_words", len)) { ret = TRUE; a->type = RSPAMD_RE_STEMWORDS; } - else if (TYPE_CHECK (start, "selector", len)) { + else if (TYPE_CHECK(start, "selector", len)) { ret = TRUE; a->type = RSPAMD_RE_SELECTOR; } @@ -278,8 +277,8 @@ rspamd_parse_long_option (const gchar *start, gsize len, * Rspamd regexp utility functions */ static struct rspamd_regexp_atom * -rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, - struct rspamd_config *cfg) +rspamd_mime_expr_parse_regexp_atom(rspamd_mempool_t *pool, const gchar *line, + struct rspamd_config *cfg) { const gchar *begin, *end, *p, *src, *start, *brace; gchar *dbegin, *dend, *extra = NULL; @@ -288,18 +287,18 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, GString *re_flags; if (line == NULL) { - msg_err_pool ("cannot parse NULL line"); + msg_err_pool("cannot parse NULL line"); return NULL; } src = line; - result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom)); + result = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_regexp_atom)); /* Skip whitespaces */ - while (g_ascii_isspace (*line)) { + while (g_ascii_isspace(*line)) { line++; } if (*line == '\0') { - msg_warn_pool ("got empty regexp"); + msg_warn_pool("got empty regexp"); return NULL; } @@ -307,7 +306,7 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, start = line; /* First try to find header name */ - begin = strchr (line, '/'); + begin = strchr(line, '/'); if (begin != NULL) { p = begin; end = NULL; @@ -320,13 +319,13 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, } if (end) { - extra = rspamd_mempool_alloc (pool, end - line + 1); - rspamd_strlcpy (extra, line, end - line + 1); + extra = rspamd_mempool_alloc(pool, end - line + 1); + rspamd_strlcpy(extra, line, end - line + 1); line = end; } } else { - extra = rspamd_mempool_strdup (pool, line); + extra = rspamd_mempool_strdup(pool, line); result->type = RSPAMD_RE_MAX; line = start; } @@ -339,13 +338,13 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, } else if (extra == NULL) { /* Assume that line without // is just a header name */ - extra = rspamd_mempool_strdup (pool, line); + extra = rspamd_mempool_strdup(pool, line); result->type = RSPAMD_RE_HEADER; return result; } else { /* We got header name earlier but have not found // expression, so it is invalid regexp */ - msg_warn_pool ( + msg_warn_pool( "got no header name (eg. header=) but without corresponding regexp, %s", src); return NULL; @@ -356,12 +355,12 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, end++; } if (end == begin || *end != '/') { - msg_warn_pool ("no trailing / in regexp %s", src); + msg_warn_pool("no trailing / in regexp %s", src); return NULL; } /* Parse flags */ p = end + 1; - re_flags = g_string_sized_new (32); + re_flags = g_string_sized_new(32); while (p != NULL) { switch (*p) { @@ -374,7 +373,7 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, case 'r': case 'L': /* Handled by rspamd_regexp_t */ - g_string_append_c (re_flags, *p); + g_string_append_c(re_flags, *p); p++; break; case 'o': @@ -427,10 +426,10 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, break; case '{': /* Long definition */ - if ((brace = strchr (p + 1, '}')) != NULL) { - if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) { - msg_warn_pool ("invalid long regexp type: %*s in '%s'", - (int)(brace - (p + 1)), p + 1, src); + if ((brace = strchr(p + 1, '}')) != NULL) { + if (!rspamd_parse_long_option(p + 1, brace - (p + 1), result)) { + msg_warn_pool("invalid long regexp type: %*s in '%s'", + (int) (brace - (p + 1)), p + 1, src); p = NULL; } else { @@ -468,16 +467,16 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, result->type = RSPAMD_RE_HEADER; } else { - msg_err_pool ("could not read regexp: %s, unknown type", src); + msg_err_pool("could not read regexp: %s, unknown type", src); return NULL; } } if ((result->type == RSPAMD_RE_HEADER || - result->type == RSPAMD_RE_RAWHEADER || - result->type == RSPAMD_RE_MIMEHEADER)) { + result->type == RSPAMD_RE_RAWHEADER || + result->type == RSPAMD_RE_MIMEHEADER)) { if (extra == NULL) { - msg_err_pool ("header regexp: '%s' has no header part", src); + msg_err_pool("header regexp: '%s' has no header part", src); return NULL; } else { @@ -487,7 +486,7 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, if (result->type == RSPAMD_RE_SELECTOR) { if (extra == NULL) { - msg_err_pool ("selector regexp: '%s' has no selector part", src); + msg_err_pool("selector regexp: '%s' has no selector part", src); return NULL; } else { @@ -496,35 +495,35 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, } - result->regexp_text = rspamd_mempool_strdup (pool, start); + result->regexp_text = rspamd_mempool_strdup(pool, start); dbegin = result->regexp_text + (begin - start); dend = result->regexp_text + (end - start); *dend = '\0'; - result->regexp = rspamd_regexp_new (dbegin, re_flags->str, - &err); + result->regexp = rspamd_regexp_new(dbegin, re_flags->str, + &err); - g_string_free (re_flags, TRUE); + g_string_free(re_flags, TRUE); if (result->regexp == NULL || err != NULL) { - msg_warn_pool ("could not read regexp: %s while reading regexp %e", - src, err); + msg_warn_pool("could not read regexp: %s while reading regexp %e", + src, err); if (err) { - g_error_free (err); + g_error_free(err); } return NULL; } if (result->is_multiple) { - rspamd_regexp_set_maxhits (result->regexp, 0); + rspamd_regexp_set_maxhits(result->regexp, 0); } else { - rspamd_regexp_set_maxhits (result->regexp, 1); + rspamd_regexp_set_maxhits(result->regexp, 1); } - rspamd_regexp_set_ud (result->regexp, result); + rspamd_regexp_set_ud(result->regexp, result); *dend = '/'; @@ -532,7 +531,7 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, } struct rspamd_function_atom * -rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input) +rspamd_mime_expr_parse_function_atom(rspamd_mempool_t *pool, const gchar *input) { const gchar *obrace, *ebrace, *p, *c; gchar t, *databuf; @@ -546,17 +545,18 @@ rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input in_regexp, got_backslash, got_comma - } state, prev_state = 0; + } state, + prev_state = 0; - obrace = strchr (input, '('); - ebrace = strrchr (input, ')'); + obrace = strchr(input, '('); + ebrace = strrchr(input, ')'); - g_assert (obrace != NULL && ebrace != NULL); + g_assert(obrace != NULL && ebrace != NULL); - res = rspamd_mempool_alloc0 (pool, sizeof (*res)); - res->name = rspamd_mempool_alloc (pool, obrace - input + 1); - rspamd_strlcpy (res->name, input, obrace - input + 1); - res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument)); + res = rspamd_mempool_alloc0(pool, sizeof(*res)); + res->name = rspamd_mempool_alloc(pool, obrace - input + 1); + rspamd_strlcpy(res->name, input, obrace - input + 1); + res->args = g_array_new(FALSE, FALSE, sizeof(struct expression_argument)); p = obrace + 1; c = p; @@ -571,7 +571,7 @@ rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input state = in_regexp; c = p; } - else if (!g_ascii_isspace (t)) { + else if (!g_ascii_isspace(t)) { state = in_string; if (t == '\'' || t == '\"') { @@ -581,7 +581,7 @@ rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input c = p; } } - p ++; + p++; break; case in_regexp: if (t == '\\') { @@ -590,24 +590,24 @@ rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input } else if (t == ',' || p == ebrace) { len = p - c + 1; - databuf = rspamd_mempool_alloc (pool, len); - rspamd_strlcpy (databuf, c, len); + databuf = rspamd_mempool_alloc(pool, len); + rspamd_strlcpy(databuf, c, len); arg.type = EXPRESSION_ARGUMENT_REGEXP; - arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err); + arg.data = rspamd_regexp_cache_create(NULL, databuf, NULL, &err); if (arg.data == NULL) { /* Fallback to string */ - msg_warn ("cannot parse slashed argument %s as regexp: %s", - databuf, err->message); - g_error_free (err); + msg_warn("cannot parse slashed argument %s as regexp: %s", + databuf, err->message); + g_error_free(err); arg.type = EXPRESSION_ARGUMENT_NORMAL; arg.data = databuf; } - g_array_append_val (res->args, arg); + g_array_append_val(res->args, arg); state = got_comma; } - p ++; + p++; break; case in_string: if (t == '\\') { @@ -622,18 +622,18 @@ rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input len = p - c + 1; } - databuf = rspamd_mempool_alloc (pool, len); - rspamd_strlcpy (databuf, c, len); + databuf = rspamd_mempool_alloc(pool, len); + rspamd_strlcpy(databuf, c, len); arg.type = EXPRESSION_ARGUMENT_NORMAL; arg.data = databuf; - g_array_append_val (res->args, arg); + g_array_append_val(res->args, arg); state = got_comma; } - p ++; + p++; break; case got_backslash: state = prev_state; - p ++; + p++; break; case got_comma: state = start_read_argument; @@ -645,13 +645,13 @@ rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input } static rspamd_expression_atom_t * -rspamd_mime_expr_parse (const gchar *line, gsize len, - rspamd_mempool_t *pool, gpointer ud, GError **err) +rspamd_mime_expr_parse(const gchar *line, gsize len, + rspamd_mempool_t *pool, gpointer ud, GError **err) { rspamd_expression_atom_t *a = NULL; struct rspamd_mime_atom *mime_atom = NULL; const gchar *p, *end, *c = NULL; - struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *)ud; + struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *) ud; struct rspamd_config *cfg; rspamd_regexp_t *own_re; gchar t; @@ -670,7 +670,8 @@ rspamd_mime_expr_parse (const gchar *line, gsize len, got_ebrace, end_atom, bad_atom - } state = 0, prev_state = 0; + } state = 0, + prev_state = 0; p = line; end = p + len; @@ -689,9 +690,9 @@ rspamd_mime_expr_parse (const gchar *line, gsize len, /* Function */ state = got_obrace; } - else if (!g_ascii_isalnum (t) && t != '_' && t != '-' && t != '=') { + else if (!g_ascii_isalnum(t) && t != '_' && t != '-' && t != '=') { if (t == ':') { - if (p - line == 3 && memcmp (line, "lua", 3) == 0) { + if (p - line == 3 && memcmp(line, "lua", 3) == 0) { type = MIME_ATOM_LOCAL_LUA_FUNCTION; state = in_local_function; c = p + 1; @@ -705,10 +706,10 @@ rspamd_mime_expr_parse (const gchar *line, gsize len, continue; } } - else if (g_ascii_isspace (t)) { + else if (g_ascii_isspace(t)) { state = bad_atom; } - p ++; + p++; break; case got_slash: state = in_regexp; @@ -721,7 +722,7 @@ rspamd_mime_expr_parse (const gchar *line, gsize len, else if (t == '/') { state = got_second_slash; } - p ++; + p++; break; case got_second_slash: state = in_flags; @@ -729,29 +730,29 @@ rspamd_mime_expr_parse (const gchar *line, gsize len, case in_flags: if (t == '{') { state = in_flags_brace; - p ++; + p++; } - else if (!g_ascii_isalpha (t) && t != '$') { + else if (!g_ascii_isalpha(t) && t != '$') { state = end_atom; } else { - p ++; + p++; } break; case in_flags_brace: if (t == '}') { state = in_flags; } - p ++; + p++; break; case got_backslash: state = prev_state; - p ++; + p++; break; case got_obrace: state = in_function; type = MIME_ATOM_INTERNAL_FUNCTION; - obraces ++; + obraces++; break; case in_function: if (t == '\\') { @@ -759,19 +760,19 @@ rspamd_mime_expr_parse (const gchar *line, gsize len, prev_state = in_function; } else if (t == '(') { - obraces ++; + obraces++; } else if (t == ')') { - ebraces ++; + ebraces++; if (ebraces == obraces) { state = got_ebrace; } } - p ++; + p++; break; case in_local_function: - if (!(g_ascii_isalnum (t) || t == '-' || t == '_')) { - g_assert (c != NULL); + if (!(g_ascii_isalnum(t) || t == '-' || t == '_')) { + g_assert(c != NULL); state = end_atom; } else { @@ -782,10 +783,11 @@ rspamd_mime_expr_parse (const gchar *line, gsize len, state = end_atom; break; case bad_atom: - g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse" - " mime atom '%s' when reading symbol '%c' at offset %d, " - "near %.*s", line, t, (gint)(p - line), - (gint)MIN (end - p, 10), p); + g_set_error(err, rspamd_mime_expr_quark(), 100, "cannot parse" + " mime atom '%s' when reading symbol '%c' at offset %d, " + "near %.*s", + line, t, (gint) (p - line), + (gint) MIN(end - p, 10), p); return NULL; case end_atom: goto set; @@ -794,24 +796,24 @@ rspamd_mime_expr_parse (const gchar *line, gsize len, set: if (p - line == 0 || (state != got_ebrace && state != got_second_slash && - state != in_flags && state != end_atom)) { - g_set_error (err, rspamd_mime_expr_quark(), 200, "incomplete or empty" - " mime atom"); + state != in_flags && state != end_atom)) { + g_set_error(err, rspamd_mime_expr_quark(), 200, "incomplete or empty" + " mime atom"); return NULL; } - mime_atom = rspamd_mempool_alloc (pool, sizeof (*mime_atom)); + mime_atom = rspamd_mempool_alloc(pool, sizeof(*mime_atom)); mime_atom->type = type; - mime_atom->str = rspamd_mempool_alloc (pool, p - line + 1); - rspamd_strlcpy (mime_atom->str, line, p - line + 1); + mime_atom->str = rspamd_mempool_alloc(pool, p - line + 1); + rspamd_strlcpy(mime_atom->str, line, p - line + 1); if (type == MIME_ATOM_REGEXP) { - mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool, - mime_atom->str, cfg); + mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom(pool, + mime_atom->str, cfg); if (mime_atom->d.re == NULL) { - g_set_error (err, rspamd_mime_expr_quark(), 200, - "cannot parse regexp '%s'", - mime_atom->str); + g_set_error(err, rspamd_mime_expr_quark(), 200, + "cannot parse regexp '%s'", + mime_atom->str); goto err; } else { @@ -819,27 +821,27 @@ set: /* Check regexp condition */ if (real_ud->conf_obj != NULL) { - const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj, - "re_conditions"); + const ucl_object_t *re_conditions = ucl_object_lookup(real_ud->conf_obj, + "re_conditions"); if (re_conditions != NULL) { - if (ucl_object_type (re_conditions) != UCL_OBJECT) { - g_set_error (err, rspamd_mime_expr_quark (), 320, - "re_conditions is not a table for '%s'", - mime_atom->str); - rspamd_regexp_unref (mime_atom->d.re->regexp); + if (ucl_object_type(re_conditions) != UCL_OBJECT) { + g_set_error(err, rspamd_mime_expr_quark(), 320, + "re_conditions is not a table for '%s'", + mime_atom->str); + rspamd_regexp_unref(mime_atom->d.re->regexp); goto err; } - const ucl_object_t *function_obj = ucl_object_lookup (re_conditions, - mime_atom->str); + const ucl_object_t *function_obj = ucl_object_lookup(re_conditions, + mime_atom->str); if (function_obj != NULL) { - if (ucl_object_type (function_obj) != UCL_USERDATA) { - g_set_error (err, rspamd_mime_expr_quark (), 320, - "condition for '%s' is invalid, must be function", - mime_atom->str); - rspamd_regexp_unref (mime_atom->d.re->regexp); + if (ucl_object_type(function_obj) != UCL_USERDATA) { + g_set_error(err, rspamd_mime_expr_quark(), 320, + "condition for '%s' is invalid, must be function", + mime_atom->str); + rspamd_regexp_unref(mime_atom->d.re->regexp); goto err; } @@ -851,132 +853,131 @@ set: } if (lua_cbref != -1) { - msg_info_config ("added condition for regexp %s", mime_atom->str); + msg_info_config("added condition for regexp %s", mime_atom->str); } /* Register new item in the cache */ if (mime_atom->d.re->type == RSPAMD_RE_HEADER || - mime_atom->d.re->type == RSPAMD_RE_RAWHEADER || - mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) { + mime_atom->d.re->type == RSPAMD_RE_RAWHEADER || + mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) { if (mime_atom->d.re->extra.header != NULL) { own_re = mime_atom->d.re->regexp; - mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache, - mime_atom->d.re->regexp, - mime_atom->d.re->type, - mime_atom->d.re->extra.header, - strlen (mime_atom->d.re->extra.header) + 1, - lua_cbref); + mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache, + mime_atom->d.re->regexp, + mime_atom->d.re->type, + mime_atom->d.re->extra.header, + strlen(mime_atom->d.re->extra.header) + 1, + lua_cbref); /* Pass ownership to the cache */ - rspamd_regexp_unref (own_re); + rspamd_regexp_unref(own_re); } else { /* We have header regexp, but no header name is detected */ - g_set_error (err, - rspamd_mime_expr_quark (), - 200, - "no header name in header regexp: '%s'", - mime_atom->str); - rspamd_regexp_unref (mime_atom->d.re->regexp); + g_set_error(err, + rspamd_mime_expr_quark(), + 200, + "no header name in header regexp: '%s'", + mime_atom->str); + rspamd_regexp_unref(mime_atom->d.re->regexp); goto err; } - } else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) { if (mime_atom->d.re->extra.selector != NULL) { own_re = mime_atom->d.re->regexp; - mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache, - mime_atom->d.re->regexp, - mime_atom->d.re->type, - mime_atom->d.re->extra.selector, - strlen (mime_atom->d.re->extra.selector) + 1, - lua_cbref); + mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache, + mime_atom->d.re->regexp, + mime_atom->d.re->type, + mime_atom->d.re->extra.selector, + strlen(mime_atom->d.re->extra.selector) + 1, + lua_cbref); /* Pass ownership to the cache */ - rspamd_regexp_unref (own_re); + rspamd_regexp_unref(own_re); } else { /* We have selector regexp, but no selector name is detected */ - g_set_error (err, - rspamd_mime_expr_quark (), - 200, - "no selector name in selector regexp: '%s'", - mime_atom->str); - rspamd_regexp_unref (mime_atom->d.re->regexp); + g_set_error(err, + rspamd_mime_expr_quark(), + 200, + "no selector name in selector regexp: '%s'", + mime_atom->str); + rspamd_regexp_unref(mime_atom->d.re->regexp); goto err; } } else { own_re = mime_atom->d.re->regexp; - mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache, - mime_atom->d.re->regexp, - mime_atom->d.re->type, - NULL, - 0, - lua_cbref); + mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache, + mime_atom->d.re->regexp, + mime_atom->d.re->type, + NULL, + 0, + lua_cbref); /* Pass ownership to the cache */ - rspamd_regexp_unref (own_re); + rspamd_regexp_unref(own_re); } } } else if (type == MIME_ATOM_LUA_FUNCTION) { mime_atom->d.lua_function = mime_atom->str; - lua_getglobal (cfg->lua_state, mime_atom->str); + lua_getglobal(cfg->lua_state, mime_atom->str); - if (lua_type (cfg->lua_state, -1) != LUA_TFUNCTION) { - g_set_error (err, rspamd_mime_expr_quark(), 200, - "no such lua function '%s'", - mime_atom->str); - lua_pop (cfg->lua_state, 1); + if (lua_type(cfg->lua_state, -1) != LUA_TFUNCTION) { + g_set_error(err, rspamd_mime_expr_quark(), 200, + "no such lua function '%s'", + mime_atom->str); + lua_pop(cfg->lua_state, 1); goto err; } - lua_pop (cfg->lua_state, 1); + lua_pop(cfg->lua_state, 1); } else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) { /* p pointer is set to the start of Lua function name */ if (real_ud->conf_obj == NULL) { - g_set_error (err, rspamd_mime_expr_quark(), 300, - "no config object for '%s'", - mime_atom->str); + g_set_error(err, rspamd_mime_expr_quark(), 300, + "no config object for '%s'", + mime_atom->str); goto err; } - const ucl_object_t *functions = ucl_object_lookup (real_ud->conf_obj, - "functions"); + const ucl_object_t *functions = ucl_object_lookup(real_ud->conf_obj, + "functions"); if (functions == NULL) { - g_set_error (err, rspamd_mime_expr_quark(), 310, - "no functions defined for '%s'", - mime_atom->str); + g_set_error(err, rspamd_mime_expr_quark(), 310, + "no functions defined for '%s'", + mime_atom->str); goto err; } - if (ucl_object_type (functions) != UCL_OBJECT) { - g_set_error (err, rspamd_mime_expr_quark(), 320, - "functions is not a table for '%s'", - mime_atom->str); + if (ucl_object_type(functions) != UCL_OBJECT) { + g_set_error(err, rspamd_mime_expr_quark(), 320, + "functions is not a table for '%s'", + mime_atom->str); goto err; } const ucl_object_t *function_obj; - function_obj = ucl_object_lookup_len (functions, c, - p - c); + function_obj = ucl_object_lookup_len(functions, c, + p - c); if (function_obj == NULL) { - g_set_error (err, rspamd_mime_expr_quark(), 320, - "function %.*s is not found for '%s'", - (int)(p - c), c, mime_atom->str); + g_set_error(err, rspamd_mime_expr_quark(), 320, + "function %.*s is not found for '%s'", + (int) (p - c), c, mime_atom->str); goto err; } - if (ucl_object_type (function_obj) != UCL_USERDATA) { - g_set_error (err, rspamd_mime_expr_quark(), 320, - "function %.*s has invalid type for '%s'", - (int)(p - c), c, mime_atom->str); + if (ucl_object_type(function_obj) != UCL_USERDATA) { + g_set_error(err, rspamd_mime_expr_quark(), 320, + "function %.*s has invalid type for '%s'", + (int) (p - c), c, mime_atom->str); goto err; } @@ -985,17 +986,17 @@ set: mime_atom->d.lua_cbref = fd->idx; } else { - mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool, - mime_atom->str); + mime_atom->d.func = rspamd_mime_expr_parse_function_atom(pool, + mime_atom->str); if (mime_atom->d.func == NULL) { - g_set_error (err, rspamd_mime_expr_quark(), 200, - "cannot parse function '%s'", - mime_atom->str); + g_set_error(err, rspamd_mime_expr_quark(), 200, + "cannot parse function '%s'", + mime_atom->str); goto err; } } - a = rspamd_mempool_alloc0 (pool, sizeof (*a)); + a = rspamd_mempool_alloc0(pool, sizeof(*a)); a->len = p - line; a->priority = 0; a->data = mime_atom; @@ -1008,45 +1009,45 @@ err: } static gint -rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re, - struct rspamd_task *task) +rspamd_mime_expr_process_regexp(struct rspamd_regexp_atom *re, + struct rspamd_task *task) { gint ret; if (re == NULL) { - msg_info_task ("invalid regexp passed"); + msg_info_task("invalid regexp passed"); return 0; } if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) { - ret = rspamd_re_cache_process (task, - re->regexp, - re->type, - re->extra.header, - strlen (re->extra.header), - re->is_strong); + ret = rspamd_re_cache_process(task, + re->regexp, + re->type, + re->extra.header, + strlen(re->extra.header), + re->is_strong); } else if (re->type == RSPAMD_RE_SELECTOR) { - ret = rspamd_re_cache_process (task, - re->regexp, - re->type, - re->extra.selector, - strlen (re->extra.selector), - re->is_strong); + ret = rspamd_re_cache_process(task, + re->regexp, + re->type, + re->extra.selector, + strlen(re->extra.selector), + re->is_strong); } else { - ret = rspamd_re_cache_process (task, - re->regexp, - re->type, - NULL, - 0, - re->is_strong); + ret = rspamd_re_cache_process(task, + re->regexp, + re->type, + NULL, + 0, + re->is_strong); } if (re->is_test) { - msg_info_task ("test %s regexp '%s' returned %d", - rspamd_re_cache_type_to_string (re->type), - re->regexp_text, ret); + msg_info_task("test %s regexp '%s' returned %d", + rspamd_re_cache_type_to_string(re->type), + re->regexp_text, ret); } return ret; @@ -1054,7 +1055,7 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re, static gint -rspamd_mime_expr_priority (rspamd_expression_atom_t *atom) +rspamd_mime_expr_priority(rspamd_expression_atom_t *atom) { struct rspamd_mime_atom *mime_atom = atom->data; gint ret = 0; @@ -1099,146 +1100,145 @@ rspamd_mime_expr_priority (rspamd_expression_atom_t *atom) } static void -rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom) +rspamd_mime_expr_destroy(rspamd_expression_atom_t *atom) { struct rspamd_mime_atom *mime_atom = atom->data; if (mime_atom) { if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) { /* Need to cleanup arguments */ - g_array_free (mime_atom->d.func->args, TRUE); + g_array_free(mime_atom->d.func->args, TRUE); } } } static gboolean -rspamd_mime_expr_process_function (struct rspamd_function_atom * func, - struct rspamd_task * task, - lua_State *L) +rspamd_mime_expr_process_function(struct rspamd_function_atom *func, + struct rspamd_task *task, + lua_State *L) { struct _fl *selected, key; key.name = func->name; - selected = bsearch (&key, - list_ptr, - functions_number, - sizeof (struct _fl), - fl_cmp); + selected = bsearch(&key, + list_ptr, + functions_number, + sizeof(struct _fl), + fl_cmp); if (selected == NULL) { /* Try to check lua function */ return FALSE; } - return selected->func (task, func->args, selected->user_data); + return selected->func(task, func->args, selected->user_data); } static gdouble -rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom) +rspamd_mime_expr_process(void *ud, rspamd_expression_atom_t *atom) { - struct rspamd_task *task = (struct rspamd_task *)ud; + struct rspamd_task *task = (struct rspamd_task *) ud; struct rspamd_mime_atom *mime_atom; lua_State *L; gdouble ret = 0; - g_assert (task != NULL); - g_assert (atom != NULL); + g_assert(task != NULL); + g_assert(atom != NULL); mime_atom = atom->data; if (mime_atom->type == MIME_ATOM_REGEXP) { - ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task); + ret = rspamd_mime_expr_process_regexp(mime_atom->d.re, task); } else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) { L = task->cfg->lua_state; - lua_getglobal (L, mime_atom->d.lua_function); - rspamd_lua_task_push (L, task); + lua_getglobal(L, mime_atom->d.lua_function); + rspamd_lua_task_push(L, task); - if (lua_pcall (L, 1, 1, 0) != 0) { - msg_info_task ("lua call to global function '%s' for atom '%s' failed: %s", - mime_atom->d.lua_function, - mime_atom->str, - lua_tostring (L, -1)); - lua_pop (L, 1); + if (lua_pcall(L, 1, 1, 0) != 0) { + msg_info_task("lua call to global function '%s' for atom '%s' failed: %s", + mime_atom->d.lua_function, + mime_atom->str, + lua_tostring(L, -1)); + lua_pop(L, 1); } else { - if (lua_type (L, -1) == LUA_TBOOLEAN) { - ret = lua_toboolean (L, -1); + if (lua_type(L, -1) == LUA_TBOOLEAN) { + ret = lua_toboolean(L, -1); } - else if (lua_type (L, -1) == LUA_TNUMBER) { - ret = lua_tonumber (L, 1); + else if (lua_type(L, -1) == LUA_TNUMBER) { + ret = lua_tonumber(L, 1); } else { - msg_err_task ("%s returned wrong return type: %s", - mime_atom->str, lua_typename (L, lua_type (L, -1))); + msg_err_task("%s returned wrong return type: %s", + mime_atom->str, lua_typename(L, lua_type(L, -1))); } /* Remove result */ - lua_pop (L, 1); + lua_pop(L, 1); } } else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) { gint err_idx; L = task->cfg->lua_state; - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); - lua_rawgeti (L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref); - rspamd_lua_task_push (L, task); + lua_rawgeti(L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref); + rspamd_lua_task_push(L, task); - if (lua_pcall (L, 1, 1, err_idx) != 0) { - msg_info_task ("lua call to local function for atom '%s' failed: %s", - mime_atom->str, - lua_tostring (L, -1)); + if (lua_pcall(L, 1, 1, err_idx) != 0) { + msg_info_task("lua call to local function for atom '%s' failed: %s", + mime_atom->str, + lua_tostring(L, -1)); } else { - if (lua_type (L, -1) == LUA_TBOOLEAN) { - ret = lua_toboolean (L, -1); + if (lua_type(L, -1) == LUA_TBOOLEAN) { + ret = lua_toboolean(L, -1); } - else if (lua_type (L, -1) == LUA_TNUMBER) { - ret = lua_tonumber (L, 1); + else if (lua_type(L, -1) == LUA_TNUMBER) { + ret = lua_tonumber(L, 1); } else { - msg_err_task ("%s returned wrong return type: %s", - mime_atom->str, lua_typename (L, lua_type (L, -1))); + msg_err_task("%s returned wrong return type: %s", + mime_atom->str, lua_typename(L, lua_type(L, -1))); } } - lua_settop (L, 0); + lua_settop(L, 0); } else { - ret = rspamd_mime_expr_process_function (mime_atom->d.func, task, - task->cfg->lua_state); + ret = rspamd_mime_expr_process_function(mime_atom->d.func, task, + task->cfg->lua_state); } return ret; } -void -register_expression_function (const gchar *name, - rspamd_internal_func_t func, - void *user_data) +void register_expression_function(const gchar *name, + rspamd_internal_func_t func, + void *user_data) { static struct _fl *new; functions_number++; - new = g_new (struct _fl, functions_number); - memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl)); + new = g_new(struct _fl, functions_number); + memcpy(new, list_ptr, (functions_number - 1) * sizeof(struct _fl)); if (list_allocated) { - g_free (list_ptr); + g_free(list_ptr); } list_allocated = TRUE; new[functions_number - 1].name = name; new[functions_number - 1].func = func; new[functions_number - 1].user_data = user_data; - qsort (new, functions_number, sizeof (struct _fl), fl_cmp); + qsort(new, functions_number, sizeof(struct _fl), fl_cmp); list_ptr = new; } gboolean -rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused) +rspamd_compare_encoding(struct rspamd_task *task, GArray *args, void *unused) { struct expression_argument *arg; @@ -1246,9 +1246,9 @@ rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused) return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); + arg = &g_array_index(args, struct expression_argument, 0); if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid argument to function is passed"); + msg_warn_task("invalid argument to function is passed"); return FALSE; } @@ -1257,7 +1257,7 @@ rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused) } gboolean -rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused) +rspamd_header_exists(struct rspamd_task *task, GArray *args, void *unused) { struct expression_argument *arg; struct rspamd_mime_header *rh; @@ -1266,17 +1266,17 @@ rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused) return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); + arg = &g_array_index(args, struct expression_argument, 0); if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid argument to function is passed"); + msg_warn_task("invalid argument to function is passed"); return FALSE; } rh = rspamd_message_get_header_array(task, - (gchar *) arg->data, FALSE); + (gchar *) arg->data, FALSE); - debug_task ("try to get header %s: %d", (gchar *)arg->data, - (rh != NULL)); + debug_task("try to get header %s: %d", (gchar *) arg->data, + (rh != NULL)); if (rh) { return TRUE; @@ -1293,56 +1293,56 @@ rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused) * and return FALSE otherwise. */ gboolean -rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused) +rspamd_parts_distance(struct rspamd_task *task, GArray *args, void *unused) { gint threshold, threshold2 = -1; struct expression_argument *arg; gdouble *pdiff, diff; if (args == NULL || args->len == 0) { - debug_task ("no threshold is specified, assume it 100"); + debug_task("no threshold is specified, assume it 100"); threshold = 100; } else { errno = 0; - arg = &g_array_index (args, struct expression_argument, 0); + arg = &g_array_index(args, struct expression_argument, 0); if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid argument to function is passed"); + msg_warn_task("invalid argument to function is passed"); return FALSE; } - threshold = strtoul ((gchar *)arg->data, NULL, 10); + threshold = strtoul((gchar *) arg->data, NULL, 10); if (errno != 0) { - msg_info_task ("bad numeric value for threshold \"%s\", assume it 100", - (gchar *)arg->data); + msg_info_task("bad numeric value for threshold \"%s\", assume it 100", + (gchar *) arg->data); threshold = 100; } if (args->len >= 2) { - arg = &g_array_index (args, struct expression_argument, 1); + arg = &g_array_index(args, struct expression_argument, 1); if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid argument to function is passed"); + msg_warn_task("invalid argument to function is passed"); return FALSE; } errno = 0; - threshold2 = strtoul ((gchar *)arg->data, NULL, 10); + threshold2 = strtoul((gchar *) arg->data, NULL, 10); if (errno != 0) { - msg_info_task ("bad numeric value for threshold \"%s\", ignore it", - (gchar *)arg->data); + msg_info_task("bad numeric value for threshold \"%s\", ignore it", + (gchar *) arg->data); threshold2 = -1; } } } if ((pdiff = - rspamd_mempool_get_variable (task->task_pool, - "parts_distance")) != NULL) { + rspamd_mempool_get_variable(task->task_pool, + "parts_distance")) != NULL) { diff = (1.0 - (*pdiff)) * 100.0; if (diff != -1) { if (threshold2 > 0) { - if (diff >= MIN (threshold, threshold2) && - diff < MAX (threshold, threshold2)) { + if (diff >= MIN(threshold, threshold2) && + diff < MAX(threshold, threshold2)) { return TRUE; } @@ -1370,24 +1370,24 @@ struct addr_list { }; static gint -addr_list_cmp_func (const void *a, const void *b) +addr_list_cmp_func(const void *a, const void *b) { - const struct addr_list *addra = (struct addr_list *)a, - *addrb = (struct addr_list *)b; + const struct addr_list *addra = (struct addr_list *) a, + *addrb = (struct addr_list *) b; if (addra->addrlen != addrb->addrlen) { return addra->addrlen - addrb->addrlen; } - return memcmp (addra->addr, addrb->addr, addra->addrlen); + return memcmp(addra->addr, addrb->addr, addra->addrlen); } #define COMPARE_RCPT_LEN 3 #define MIN_RCPT_TO_COMPARE 7 gboolean -rspamd_recipients_distance (struct rspamd_task *task, GArray * args, - void *unused) +rspamd_recipients_distance(struct rspamd_task *task, GArray *args, + void *unused) { struct expression_argument *arg; struct rspamd_email_address *cur; @@ -1396,64 +1396,65 @@ rspamd_recipients_distance (struct rspamd_task *task, GArray * args, gint num, i, hits = 0; if (args == NULL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); + arg = &g_array_index(args, struct expression_argument, 0); if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid argument to function is passed"); + msg_warn_task("invalid argument to function is passed"); return FALSE; } errno = 0; - threshold = strtod ((gchar *)arg->data, NULL); + threshold = strtod((gchar *) arg->data, NULL); if (errno != 0) { - msg_warn_task ("invalid numeric value '%s': %s", - (gchar *)arg->data, - strerror (errno)); + msg_warn_task("invalid numeric value '%s': %s", + (gchar *) arg->data, + strerror(errno)); return FALSE; } - if (!MESSAGE_FIELD (task, rcpt_mime)) { + if (!MESSAGE_FIELD(task, rcpt_mime)) { return FALSE; } - num = MESSAGE_FIELD (task, rcpt_mime)->len; + num = MESSAGE_FIELD(task, rcpt_mime)->len; if (num < MIN_RCPT_TO_COMPARE) { return FALSE; } - ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list)); + ar = rspamd_mempool_alloc0(task->task_pool, num * sizeof(struct addr_list)); /* Fill array */ num = 0; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, rcpt_mime), i, cur) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, rcpt_mime), i, cur) + { if (cur->addr_len > COMPARE_RCPT_LEN) { ar[num].name = cur->addr; ar[num].namelen = cur->addr_len; ar[num].addr = cur->domain; ar[num].addrlen = cur->domain_len; - num ++; + num++; } } - qsort (ar, num, sizeof (*ar), addr_list_cmp_func); + qsort(ar, num, sizeof(*ar), addr_list_cmp_func); /* Cycle all elements in array */ for (i = 0; i < num; i++) { if (i < num - 1) { if (ar[i].namelen == ar[i + 1].namelen) { - if (rspamd_lc_cmp (ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) { + if (rspamd_lc_cmp(ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) { hits++; } } } } - if ((hits * num / 2.) / (double)num >= threshold) { + if ((hits * num / 2.) / (double) num >= threshold) { return TRUE; } @@ -1461,17 +1462,18 @@ rspamd_recipients_distance (struct rspamd_task *task, GArray * args, } gboolean -rspamd_has_only_html_part (struct rspamd_task * task, GArray * args, - void *unused) +rspamd_has_only_html_part(struct rspamd_task *task, GArray *args, + void *unused) { struct rspamd_mime_text_part *p; guint i, cnt_html = 0, cnt_txt = 0; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { - p = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0); + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p) + { + p = g_ptr_array_index(MESSAGE_FIELD(task, text_parts), 0); - if (!IS_TEXT_PART_ATTACHMENT (p)) { - if (IS_TEXT_PART_HTML (p)) { + if (!IS_TEXT_PART_ATTACHMENT(p)) { + if (IS_TEXT_PART_HTML(p)) { cnt_html++; } else { @@ -1484,7 +1486,7 @@ rspamd_has_only_html_part (struct rspamd_task * task, GArray * args, } static gboolean -is_recipient_list_sorted (GPtrArray *ar) +is_recipient_list_sorted(GPtrArray *ar) { struct rspamd_email_address *addr; gboolean res = TRUE; @@ -1499,12 +1501,13 @@ is_recipient_list_sorted (GPtrArray *ar) prev.len = 0; prev.begin = NULL; - PTR_ARRAY_FOREACH (ar, i, addr) { + PTR_ARRAY_FOREACH(ar, i, addr) + { cur.begin = addr->addr; cur.len = addr->addr_len; if (prev.len != 0) { - if (rspamd_ftok_casecmp (&cur, &prev) <= 0) { + if (rspamd_ftok_casecmp(&cur, &prev) <= 0) { res = FALSE; break; } @@ -1517,23 +1520,23 @@ is_recipient_list_sorted (GPtrArray *ar) } gboolean -rspamd_is_recipients_sorted (struct rspamd_task * task, - GArray * args, - void *unused) +rspamd_is_recipients_sorted(struct rspamd_task *task, + GArray *args, + void *unused) { /* Check all types of addresses */ - if (MESSAGE_FIELD (task, rcpt_mime)) { - return is_recipient_list_sorted (MESSAGE_FIELD (task, rcpt_mime)); + if (MESSAGE_FIELD(task, rcpt_mime)) { + return is_recipient_list_sorted(MESSAGE_FIELD(task, rcpt_mime)); } return FALSE; } gboolean -rspamd_compare_transfer_encoding (struct rspamd_task * task, - GArray * args, - void *unused) +rspamd_compare_transfer_encoding(struct rspamd_task *task, + GArray *args, + void *unused) { struct expression_argument *arg; guint i; @@ -1541,25 +1544,26 @@ rspamd_compare_transfer_encoding (struct rspamd_task * task, enum rspamd_cte cte; if (args == NULL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); + arg = &g_array_index(args, struct expression_argument, 0); if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid argument to function is passed"); + msg_warn_task("invalid argument to function is passed"); return FALSE; } - cte = rspamd_cte_from_string (arg->data); + cte = rspamd_cte_from_string(arg->data); if (cte == RSPAMD_CTE_UNKNOWN) { - msg_warn_task ("unknown cte: %s", arg->data); + msg_warn_task("unknown cte: %s", arg->data); return FALSE; } - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { - if (IS_PART_TEXT (part)) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { + if (IS_PART_TEXT(part)) { if (part->cte == cte) { return TRUE; } @@ -1570,14 +1574,14 @@ rspamd_compare_transfer_encoding (struct rspamd_task * task, } gboolean -rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused) +rspamd_is_html_balanced(struct rspamd_task *task, GArray *args, void *unused) { /* Totally broken but seems to be never used */ return TRUE; } gboolean -rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) +rspamd_has_html_tag(struct rspamd_task *task, GArray *args, void *unused) { struct rspamd_mime_text_part *p; struct expression_argument *arg; @@ -1585,19 +1589,20 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) gboolean res = FALSE; if (args == NULL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); + arg = &g_array_index(args, struct expression_argument, 0); if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid argument to function is passed"); + msg_warn_task("invalid argument to function is passed"); return FALSE; } - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { - if (IS_TEXT_PART_HTML (p) && p->html) { - res = rspamd_html_tag_seen (p->html, arg->data); + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p) + { + if (IS_TEXT_PART_HTML(p) && p->html) { + res = rspamd_html_tag_seen(p->html, arg->data); } if (res) { @@ -1606,18 +1611,18 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused) } return res; - } gboolean -rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused) +rspamd_has_fake_html(struct rspamd_task *task, GArray *args, void *unused) { struct rspamd_mime_text_part *p; guint i; gboolean res = FALSE; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) { - if (IS_TEXT_PART_HTML (p) && (rspamd_html_get_tags_count(p->html) < 2)) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p) + { + if (IS_TEXT_PART_HTML(p) && (rspamd_html_get_tags_count(p->html) < 2)) { res = TRUE; } @@ -1627,11 +1632,10 @@ rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused) } return res; - } static gboolean -rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused) +rspamd_raw_header_exists(struct rspamd_task *task, GArray *args, void *unused) { struct expression_argument *arg; @@ -1639,9 +1643,9 @@ rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused) return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); + arg = &g_array_index(args, struct expression_argument, 0); if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid argument to function is passed"); + msg_warn_task("invalid argument to function is passed"); return FALSE; } @@ -1649,9 +1653,9 @@ rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused) } static gboolean -match_smtp_data (struct rspamd_task *task, - struct expression_argument *arg, - const gchar *what, gsize len) +match_smtp_data(struct rspamd_task *task, + struct expression_argument *arg, + const gchar *what, gsize len) { rspamd_regexp_t *re; gint r = 0; @@ -1660,19 +1664,19 @@ match_smtp_data (struct rspamd_task *task, /* This is a regexp */ re = arg->data; if (re == NULL) { - msg_warn_task ("cannot compile regexp for function"); + msg_warn_task("cannot compile regexp for function"); return FALSE; } if (len > 0) { - r = rspamd_regexp_search (re, what, len, NULL, NULL, FALSE, NULL); + r = rspamd_regexp_search(re, what, len, NULL, NULL, FALSE, NULL); } return r; } else if (arg->type == EXPRESSION_ARGUMENT_NORMAL && - g_ascii_strncasecmp (arg->data, what, len) == 0) { + g_ascii_strncasecmp(arg->data, what, len) == 0) { return TRUE; } @@ -1680,7 +1684,7 @@ match_smtp_data (struct rspamd_task *task, } static gboolean -rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused) +rspamd_check_smtp_data(struct rspamd_task *task, GArray *args, void *unused) { struct expression_argument *arg; struct rspamd_email_address *addr = NULL; @@ -1689,14 +1693,14 @@ rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused) guint i; if (args == NULL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); + arg = &g_array_index(args, struct expression_argument, 0); if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } else { @@ -1704,56 +1708,56 @@ rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused) switch (*type) { case 'f': case 'F': - if (g_ascii_strcasecmp (type, "from") == 0) { - addr = rspamd_task_get_sender (task); + if (g_ascii_strcasecmp(type, "from") == 0) { + addr = rspamd_task_get_sender(task); } else { - msg_warn_task ("bad argument to function: %s", type); + msg_warn_task("bad argument to function: %s", type); return FALSE; } break; case 'h': case 'H': - if (g_ascii_strcasecmp (type, "helo") == 0) { + if (g_ascii_strcasecmp(type, "helo") == 0) { str = task->helo; } else { - msg_warn_task ("bad argument to function: %s", type); + msg_warn_task("bad argument to function: %s", type); return FALSE; } break; case 'u': case 'U': - if (g_ascii_strcasecmp (type, "user") == 0) { + if (g_ascii_strcasecmp(type, "user") == 0) { str = task->auth_user; } else { - msg_warn_task ("bad argument to function: %s", type); + msg_warn_task("bad argument to function: %s", type); return FALSE; } break; case 's': case 'S': - if (g_ascii_strcasecmp (type, "subject") == 0) { - str = MESSAGE_FIELD (task, subject); + if (g_ascii_strcasecmp(type, "subject") == 0) { + str = MESSAGE_FIELD(task, subject); } else { - msg_warn_task ("bad argument to function: %s", type); + msg_warn_task("bad argument to function: %s", type); return FALSE; } break; case 'r': case 'R': - if (g_ascii_strcasecmp (type, "rcpt") == 0) { + if (g_ascii_strcasecmp(type, "rcpt") == 0) { rcpts = task->rcpt_envelope; } else { - msg_warn_task ("bad argument to function: %s", type); + msg_warn_task("bad argument to function: %s", type); return FALSE; } break; default: - msg_warn_task ("bad argument to function: %s", type); + msg_warn_task("bad argument to function: %s", type); return FALSE; } } @@ -1765,23 +1769,23 @@ rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused) /* We would process only one more argument, others are ignored */ if (args->len >= 2) { - arg = &g_array_index (args, struct expression_argument, 1); + arg = &g_array_index(args, struct expression_argument, 1); if (arg) { if (str != NULL) { - return match_smtp_data (task, arg, str, strlen (str)); + return match_smtp_data(task, arg, str, strlen(str)); } else if (addr != NULL && addr->addr) { - return match_smtp_data (task, arg, addr->addr, addr->addr_len); + return match_smtp_data(task, arg, addr->addr, addr->addr_len); } else { if (rcpts != NULL) { - for (i = 0; i < rcpts->len; i ++) { - addr = g_ptr_array_index (rcpts, i); + for (i = 0; i < rcpts->len; i++) { + addr = g_ptr_array_index(rcpts, i); if (addr && addr->addr && - match_smtp_data (task, arg, - addr->addr, addr->addr_len)) { + match_smtp_data(task, arg, + addr->addr, addr->addr_len)) { return TRUE; } } @@ -1794,8 +1798,8 @@ rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused) } static inline gboolean -rspamd_check_ct_attr (const gchar *begin, gsize len, - struct expression_argument *arg_pattern) +rspamd_check_ct_attr(const gchar *begin, gsize len, + struct expression_argument *arg_pattern) { rspamd_regexp_t *re; gboolean r = FALSE; @@ -1804,9 +1808,9 @@ rspamd_check_ct_attr (const gchar *begin, gsize len, re = arg_pattern->data; if (len > 0) { - r = rspamd_regexp_search (re, - begin, len, - NULL, NULL, FALSE, NULL); + r = rspamd_regexp_search(re, + begin, len, + NULL, NULL, FALSE, NULL); } if (r) { @@ -1815,10 +1819,10 @@ rspamd_check_ct_attr (const gchar *begin, gsize len, } else { /* Just do strcasecmp */ - gsize plen = strlen (arg_pattern->data); + gsize plen = strlen(arg_pattern->data); if (plen == len && - g_ascii_strncasecmp (arg_pattern->data, begin, len) == 0) { + g_ascii_strncasecmp(arg_pattern->data, begin, len) == 0) { return TRUE; } } @@ -1827,9 +1831,9 @@ rspamd_check_ct_attr (const gchar *begin, gsize len, } static gboolean -rspamd_content_type_compare_param (struct rspamd_task * task, - GArray * args, - void *unused) +rspamd_content_type_compare_param(struct rspamd_task *task, + GArray *args, + void *unused) { struct expression_argument *arg, *arg1, *arg_pattern; @@ -1841,20 +1845,21 @@ rspamd_content_type_compare_param (struct rspamd_task * task, const gchar *param_name; if (args == NULL || args->len < 2) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); - g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); + arg = &g_array_index(args, struct expression_argument, 0); + g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL); param_name = arg->data; - arg_pattern = &g_array_index (args, struct expression_argument, 1); + arg_pattern = &g_array_index(args, struct expression_argument, 1); - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part) + { if (args->len >= 3) { - arg1 = &g_array_index (args, struct expression_argument, 2); - if (g_ascii_strncasecmp (arg1->data, "true", - sizeof ("true") - 1) == 0) { + arg1 = &g_array_index(args, struct expression_argument, 2); + if (g_ascii_strncasecmp(arg1->data, "true", + sizeof("true") - 1) == 0) { recursive = TRUE; } } @@ -1863,37 +1868,38 @@ rspamd_content_type_compare_param (struct rspamd_task * task, * If user did not specify argument, let's assume that he wants * recursive search if mime part is multipart/mixed */ - if (IS_PART_MULTIPART (cur_part)) { + if (IS_PART_MULTIPART(cur_part)) { recursive = TRUE; } } rspamd_ftok_t lit; - RSPAMD_FTOK_FROM_STR (&srch, param_name); - RSPAMD_FTOK_FROM_STR (&lit, "charset"); + RSPAMD_FTOK_FROM_STR(&srch, param_name); + RSPAMD_FTOK_FROM_STR(&lit, "charset"); - if (rspamd_ftok_equal (&srch, &lit)) { - if (rspamd_check_ct_attr (cur_part->ct->charset.begin, - cur_part->ct->charset.len, arg_pattern)) { + if (rspamd_ftok_equal(&srch, &lit)) { + if (rspamd_check_ct_attr(cur_part->ct->charset.begin, + cur_part->ct->charset.len, arg_pattern)) { return TRUE; } } - RSPAMD_FTOK_FROM_STR (&lit, "boundary"); - if (rspamd_ftok_equal (&srch, &lit)) { - if (rspamd_check_ct_attr (cur_part->ct->orig_boundary.begin, - cur_part->ct->orig_boundary.len, arg_pattern)) { + RSPAMD_FTOK_FROM_STR(&lit, "boundary"); + if (rspamd_ftok_equal(&srch, &lit)) { + if (rspamd_check_ct_attr(cur_part->ct->orig_boundary.begin, + cur_part->ct->orig_boundary.len, arg_pattern)) { return TRUE; } } if (cur_part->ct->attrs) { - found = g_hash_table_lookup (cur_part->ct->attrs, &srch); + found = g_hash_table_lookup(cur_part->ct->attrs, &srch); if (found) { - DL_FOREACH (found, cur) { - if (rspamd_check_ct_attr (cur->value.begin, - cur->value.len, arg_pattern)) { + DL_FOREACH(found, cur) + { + if (rspamd_check_ct_attr(cur->value.begin, + cur->value.len, arg_pattern)) { return TRUE; } } @@ -1909,9 +1915,9 @@ rspamd_content_type_compare_param (struct rspamd_task * task, } static gboolean -rspamd_content_type_has_param (struct rspamd_task * task, - GArray * args, - void *unused) +rspamd_content_type_has_param(struct rspamd_task *task, + GArray *args, + void *unused) { struct expression_argument *arg, *arg1; gboolean recursive = FALSE; @@ -1922,19 +1928,20 @@ rspamd_content_type_has_param (struct rspamd_task * task, const gchar *param_name; if (args == NULL || args->len < 1) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - arg = &g_array_index (args, struct expression_argument, 0); - g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); + arg = &g_array_index(args, struct expression_argument, 0); + g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL); param_name = arg->data; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part) + { if (args->len >= 2) { - arg1 = &g_array_index (args, struct expression_argument, 1); - if (g_ascii_strncasecmp (arg1->data, "true", - sizeof ("true") - 1) == 0) { + arg1 = &g_array_index(args, struct expression_argument, 1); + if (g_ascii_strncasecmp(arg1->data, "true", + sizeof("true") - 1) == 0) { recursive = TRUE; } } @@ -1943,31 +1950,31 @@ rspamd_content_type_has_param (struct rspamd_task * task, * If user did not specify argument, let's assume that he wants * recursive search if mime part is multipart/mixed */ - if (IS_PART_MULTIPART (cur_part)) { + if (IS_PART_MULTIPART(cur_part)) { recursive = TRUE; } } rspamd_ftok_t lit; - RSPAMD_FTOK_FROM_STR (&srch, param_name); - RSPAMD_FTOK_FROM_STR (&lit, "charset"); + RSPAMD_FTOK_FROM_STR(&srch, param_name); + RSPAMD_FTOK_FROM_STR(&lit, "charset"); - if (rspamd_ftok_equal (&srch, &lit)) { + if (rspamd_ftok_equal(&srch, &lit)) { if (cur_part->ct->charset.len > 0) { return TRUE; } } - RSPAMD_FTOK_FROM_STR (&lit, "boundary"); - if (rspamd_ftok_equal (&srch, &lit)) { + RSPAMD_FTOK_FROM_STR(&lit, "boundary"); + if (rspamd_ftok_equal(&srch, &lit)) { if (cur_part->ct->boundary.len > 0) { return TRUE; } } if (cur_part->ct->attrs) { - found = g_hash_table_lookup (cur_part->ct->attrs, &srch); + found = g_hash_table_lookup(cur_part->ct->attrs, &srch); if (found) { return TRUE; @@ -1983,9 +1990,9 @@ rspamd_content_type_has_param (struct rspamd_task * task, } static gboolean -rspamd_content_type_check (struct rspamd_task *task, - GArray * args, - gboolean check_subtype) +rspamd_content_type_check(struct rspamd_task *task, + GArray *args, + gboolean check_subtype) { rspamd_ftok_t *param_data, srch; rspamd_regexp_t *re; @@ -1997,19 +2004,20 @@ rspamd_content_type_check (struct rspamd_task *task, struct rspamd_mime_part *cur_part; if (args == NULL || args->len < 1) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - arg_pattern = &g_array_index (args, struct expression_argument, 0); + arg_pattern = &g_array_index(args, struct expression_argument, 0); - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part) + { ct = cur_part->ct; if (args->len >= 2) { - arg1 = &g_array_index (args, struct expression_argument, 1); - if (g_ascii_strncasecmp (arg1->data, "true", - sizeof ("true") - 1) == 0) { + arg1 = &g_array_index(args, struct expression_argument, 1); + if (g_ascii_strncasecmp(arg1->data, "true", + sizeof("true") - 1) == 0) { recursive = TRUE; } } @@ -2018,7 +2026,7 @@ rspamd_content_type_check (struct rspamd_task *task, * If user did not specify argument, let's assume that he wants * recursive search if mime part is multipart/mixed */ - if (IS_PART_MULTIPART (cur_part)) { + if (IS_PART_MULTIPART(cur_part)) { recursive = TRUE; } } @@ -2034,8 +2042,8 @@ rspamd_content_type_check (struct rspamd_task *task, re = arg_pattern->data; if (param_data->len > 0) { - r = rspamd_regexp_search (re, param_data->begin, param_data->len, - NULL, NULL, FALSE, NULL); + r = rspamd_regexp_search(re, param_data->begin, param_data->len, + NULL, NULL, FALSE, NULL); } if (r) { @@ -2045,9 +2053,9 @@ rspamd_content_type_check (struct rspamd_task *task, else { /* Just do strcasecmp */ srch.begin = arg_pattern->data; - srch.len = strlen (arg_pattern->data); + srch.len = strlen(arg_pattern->data); - if (rspamd_ftok_casecmp (param_data, &srch) == 0) { + if (rspamd_ftok_casecmp(param_data, &srch) == 0) { return TRUE; } } @@ -2062,47 +2070,47 @@ rspamd_content_type_check (struct rspamd_task *task, } static gboolean -rspamd_content_type_is_type (struct rspamd_task * task, - GArray * args, - void *unused) +rspamd_content_type_is_type(struct rspamd_task *task, + GArray *args, + void *unused) { - return rspamd_content_type_check (task, args, FALSE); + return rspamd_content_type_check(task, args, FALSE); } static gboolean -rspamd_content_type_is_subtype (struct rspamd_task * task, - GArray * args, - void *unused) +rspamd_content_type_is_subtype(struct rspamd_task *task, + GArray *args, + void *unused) { - return rspamd_content_type_check (task, args, TRUE); + return rspamd_content_type_check(task, args, TRUE); } static gboolean -compare_subtype (struct rspamd_task *task, struct rspamd_content_type *ct, - struct expression_argument *subtype) +compare_subtype(struct rspamd_task *task, struct rspamd_content_type *ct, + struct expression_argument *subtype) { rspamd_regexp_t *re; rspamd_ftok_t srch; gint r = 0; if (subtype == NULL || ct == NULL) { - msg_warn_task ("invalid parameters passed"); + msg_warn_task("invalid parameters passed"); return FALSE; } if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) { re = subtype->data; if (ct->subtype.len > 0) { - r = rspamd_regexp_search (re, ct->subtype.begin, ct->subtype.len, - NULL, NULL, FALSE, NULL); + r = rspamd_regexp_search(re, ct->subtype.begin, ct->subtype.len, + NULL, NULL, FALSE, NULL); } } else { srch.begin = subtype->data; - srch.len = strlen (subtype->data); + srch.len = strlen(subtype->data); /* Just do strcasecmp */ - if (rspamd_ftok_casecmp (&ct->subtype, &srch) == 0) { + if (rspamd_ftok_casecmp(&ct->subtype, &srch) == 0) { return TRUE; } } @@ -2111,7 +2119,7 @@ compare_subtype (struct rspamd_task *task, struct rspamd_content_type *ct, } static gboolean -compare_len (struct rspamd_mime_part *part, guint min, guint max) +compare_len(struct rspamd_mime_part *part, guint min, guint max) { if (min == 0 && max == 0) { return TRUE; @@ -2129,11 +2137,11 @@ compare_len (struct rspamd_mime_part *part, guint min, guint max) } static gboolean -common_has_content_part (struct rspamd_task * task, - struct expression_argument *param_type, - struct expression_argument *param_subtype, - gint min_len, - gint max_len) +common_has_content_part(struct rspamd_task *task, + struct expression_argument *param_type, + struct expression_argument *param_subtype, + gint min_len, + gint max_len) { rspamd_regexp_t *re; struct rspamd_mime_part *part; @@ -2142,7 +2150,8 @@ common_has_content_part (struct rspamd_task * task, gint r = 0; guint i; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { ct = part->ct; if (ct == NULL) { @@ -2153,14 +2162,14 @@ common_has_content_part (struct rspamd_task * task, re = param_type->data; if (ct->type.len > 0) { - r = rspamd_regexp_search (re, ct->type.begin, ct->type.len, - NULL, NULL, FALSE, NULL); + r = rspamd_regexp_search(re, ct->type.begin, ct->type.len, + NULL, NULL, FALSE, NULL); } /* Also check subtype and length of the part */ if (r && param_subtype) { - r = compare_len (part, min_len, max_len) && - compare_subtype (task, ct, param_subtype); + r = compare_len(part, min_len, max_len) && + compare_subtype(task, ct, param_subtype); return r; } @@ -2168,18 +2177,18 @@ common_has_content_part (struct rspamd_task * task, else { /* Just do strcasecmp */ srch.begin = param_type->data; - srch.len = strlen (param_type->data); + srch.len = strlen(param_type->data); - if (rspamd_ftok_casecmp (&ct->type, &srch) == 0) { + if (rspamd_ftok_casecmp(&ct->type, &srch) == 0) { if (param_subtype) { - if (compare_subtype (task, ct, param_subtype)) { - if (compare_len (part, min_len, max_len)) { + if (compare_subtype(task, ct, param_subtype)) { + if (compare_len(part, min_len, max_len)) { return TRUE; } } } else { - if (compare_len (part, min_len, max_len)) { + if (compare_len(part, min_len, max_len)) { return TRUE; } } @@ -2191,82 +2200,83 @@ common_has_content_part (struct rspamd_task * task, } static gboolean -rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused) +rspamd_has_content_part(struct rspamd_task *task, GArray *args, void *unused) { struct expression_argument *param_type = NULL, *param_subtype = NULL; if (args == NULL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - param_type = &g_array_index (args, struct expression_argument, 0); + param_type = &g_array_index(args, struct expression_argument, 0); if (args->len >= 2) { - param_subtype = &g_array_index (args, struct expression_argument, 1); + param_subtype = &g_array_index(args, struct expression_argument, 1); } - return common_has_content_part (task, param_type, param_subtype, 0, 0); + return common_has_content_part(task, param_type, param_subtype, 0, 0); } static gboolean -rspamd_has_content_part_len (struct rspamd_task * task, - GArray * args, - void *unused) +rspamd_has_content_part_len(struct rspamd_task *task, + GArray *args, + void *unused) { struct expression_argument *param_type = NULL, *param_subtype = NULL; gint min = 0, max = 0; struct expression_argument *arg; if (args == NULL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - param_type = &g_array_index (args, struct expression_argument, 0); + param_type = &g_array_index(args, struct expression_argument, 0); if (args->len >= 2) { - param_subtype = &g_array_index (args, struct expression_argument, 1); + param_subtype = &g_array_index(args, struct expression_argument, 1); if (args->len >= 3) { - arg = &g_array_index (args, struct expression_argument, 2); + arg = &g_array_index(args, struct expression_argument, 2); errno = 0; - min = strtoul (arg->data, NULL, 10); - g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); + min = strtoul(arg->data, NULL, 10); + g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL); if (errno != 0) { - msg_warn_task ("invalid numeric value '%s': %s", - (gchar *)arg->data, - strerror (errno)); + msg_warn_task("invalid numeric value '%s': %s", + (gchar *) arg->data, + strerror(errno)); return FALSE; } if (args->len >= 4) { - arg = &g_array_index (args, struct expression_argument, 3); - g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL); - max = strtoul (arg->data, NULL, 10); + arg = &g_array_index(args, struct expression_argument, 3); + g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL); + max = strtoul(arg->data, NULL, 10); if (errno != 0) { - msg_warn_task ("invalid numeric value '%s': %s", - (gchar *)arg->data, - strerror (errno)); + msg_warn_task("invalid numeric value '%s': %s", + (gchar *) arg->data, + strerror(errno)); return FALSE; } } } } - return common_has_content_part (task, param_type, param_subtype, min, max); + return common_has_content_part(task, param_type, param_subtype, min, max); } static gboolean -rspamd_is_empty_body (struct rspamd_task *task, - GArray * args, - void *unused) +rspamd_is_empty_body(struct rspamd_task *task, + GArray *args, + void *unused) { struct rspamd_mime_part *part; guint i; - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { if (part->parsed_data.len > 0) { return FALSE; } @@ -2275,72 +2285,76 @@ rspamd_is_empty_body (struct rspamd_task *task, return TRUE; } -#define TASK_FLAG_READ(flag) do { \ - result = !!(task->flags & (flag)); \ -} while(0) - -#define TASK_GET_FLAG(flag, strname, macro) do { \ - if (!found && strcmp ((flag), strname) == 0) { \ - TASK_FLAG_READ((macro)); \ - found = TRUE; \ - } \ -} while(0) - -#define TASK_PROTOCOL_FLAG_READ(flag) do { \ - result = !!(task->protocol_flags & (flag)); \ -} while(0) - -#define TASK_GET_PROTOCOL_FLAG(flag, strname, macro) do { \ - if (!found && strcmp ((flag), strname) == 0) { \ - TASK_PROTOCOL_FLAG_READ((macro)); \ - found = TRUE; \ - } \ -} while(0) +#define TASK_FLAG_READ(flag) \ + do { \ + result = !!(task->flags & (flag)); \ + } while (0) + +#define TASK_GET_FLAG(flag, strname, macro) \ + do { \ + if (!found && strcmp((flag), strname) == 0) { \ + TASK_FLAG_READ((macro)); \ + found = TRUE; \ + } \ + } while (0) + +#define TASK_PROTOCOL_FLAG_READ(flag) \ + do { \ + result = !!(task->protocol_flags & (flag)); \ + } while (0) + +#define TASK_GET_PROTOCOL_FLAG(flag, strname, macro) \ + do { \ + if (!found && strcmp((flag), strname) == 0) { \ + TASK_PROTOCOL_FLAG_READ((macro)); \ + found = TRUE; \ + } \ + } while (0) static gboolean -rspamd_has_flag_expr (struct rspamd_task *task, - GArray * args, - void *unused) +rspamd_has_flag_expr(struct rspamd_task *task, + GArray *args, + void *unused) { gboolean found = FALSE, result = FALSE; struct expression_argument *flag_arg; const gchar *flag_str; if (args == NULL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - flag_arg = &g_array_index (args, struct expression_argument, 0); + flag_arg = &g_array_index(args, struct expression_argument, 0); if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid parameter to function"); + msg_warn_task("invalid parameter to function"); return FALSE; } - flag_str = (const gchar *)flag_arg->data; - - TASK_GET_FLAG (flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL); - TASK_GET_FLAG (flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG); - TASK_GET_FLAG (flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT); - TASK_GET_FLAG (flag_str, "skip", RSPAMD_TASK_FLAG_SKIP); - TASK_GET_PROTOCOL_FLAG (flag_str, "extended_urls", - RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS); - TASK_GET_FLAG (flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM); - TASK_GET_FLAG (flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM); - TASK_GET_FLAG (flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED); - TASK_GET_FLAG (flag_str, "broken_headers", - RSPAMD_TASK_FLAG_BROKEN_HEADERS); - TASK_GET_FLAG (flag_str, "skip_process", - RSPAMD_TASK_FLAG_SKIP_PROCESS); - TASK_GET_PROTOCOL_FLAG (flag_str, "milter", - RSPAMD_TASK_PROTOCOL_FLAG_MILTER); - TASK_GET_FLAG (flag_str, "bad_unicode", - RSPAMD_TASK_FLAG_BAD_UNICODE); + flag_str = (const gchar *) flag_arg->data; + + TASK_GET_FLAG(flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL); + TASK_GET_FLAG(flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG); + TASK_GET_FLAG(flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT); + TASK_GET_FLAG(flag_str, "skip", RSPAMD_TASK_FLAG_SKIP); + TASK_GET_PROTOCOL_FLAG(flag_str, "extended_urls", + RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS); + TASK_GET_FLAG(flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM); + TASK_GET_FLAG(flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM); + TASK_GET_FLAG(flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED); + TASK_GET_FLAG(flag_str, "broken_headers", + RSPAMD_TASK_FLAG_BROKEN_HEADERS); + TASK_GET_FLAG(flag_str, "skip_process", + RSPAMD_TASK_FLAG_SKIP_PROCESS); + TASK_GET_PROTOCOL_FLAG(flag_str, "milter", + RSPAMD_TASK_PROTOCOL_FLAG_MILTER); + TASK_GET_FLAG(flag_str, "bad_unicode", + RSPAMD_TASK_FLAG_BAD_UNICODE); if (!found) { - msg_warn_task ("invalid flag name %s", flag_str); + msg_warn_task("invalid flag name %s", flag_str); return FALSE; } @@ -2348,28 +2362,28 @@ rspamd_has_flag_expr (struct rspamd_task *task, } static gboolean -rspamd_has_symbol_expr (struct rspamd_task *task, - GArray * args, - void *unused) +rspamd_has_symbol_expr(struct rspamd_task *task, + GArray *args, + void *unused) { struct expression_argument *sym_arg; const gchar *symbol_str; if (args == NULL) { - msg_warn_task ("no parameters to function"); + msg_warn_task("no parameters to function"); return FALSE; } - sym_arg = &g_array_index (args, struct expression_argument, 0); + sym_arg = &g_array_index(args, struct expression_argument, 0); if (sym_arg->type != EXPRESSION_ARGUMENT_NORMAL) { - msg_warn_task ("invalid parameter to function"); + msg_warn_task("invalid parameter to function"); return FALSE; } - symbol_str = (const gchar *)sym_arg->data; + symbol_str = (const gchar *) sym_arg->data; - if (rspamd_task_find_symbol_result (task, symbol_str, NULL)) { + if (rspamd_task_find_symbol_result(task, symbol_str, NULL)) { return TRUE; } diff --git a/src/libmime/mime_expressions.h b/src/libmime/mime_expressions.h index 935027482..a2ea3fea4 100644 --- a/src/libmime/mime_expressions.h +++ b/src/libmime/mime_expressions.h @@ -10,7 +10,7 @@ #include "expression.h" #include "contrib/libucl/ucl.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -33,13 +33,13 @@ enum rspamd_expression_type { EXPRESSION_ARGUMENT_REGEXP }; struct expression_argument { - enum rspamd_expression_type type; /**< type of argument (text or other function) */ - void *data; /**< pointer to its data */ + enum rspamd_expression_type type; /**< type of argument (text or other function) */ + void *data; /**< pointer to its data */ }; -typedef gboolean (*rspamd_internal_func_t) (struct rspamd_task *, - GArray *args, void *user_data); +typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, + GArray *args, void *user_data); /** @@ -47,18 +47,18 @@ typedef gboolean (*rspamd_internal_func_t) (struct rspamd_task *, * @param name name of function * @param func pointer to function */ -void register_expression_function (const gchar *name, - rspamd_internal_func_t func, - void *user_data); +void register_expression_function(const gchar *name, + rspamd_internal_func_t func, + void *user_data); /** * Set global limit of regexp data size to be processed * @param limit new limit in bytes * @return old limit value */ -guint rspamd_mime_expression_set_re_limit (guint limit); +guint rspamd_mime_expression_set_re_limit(guint limit); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index 422c9b8f8..daba57f0d 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -24,9 +24,9 @@ #include "libutil/util.h" #include <unicode/utf8.h> -KHASH_INIT (rspamd_mime_headers_htb, gchar *, - struct rspamd_mime_header *, 1, - rspamd_strcase_hash, rspamd_strcase_equal); +KHASH_INIT(rspamd_mime_headers_htb, gchar *, + struct rspamd_mime_header *, 1, + rspamd_strcase_hash, rspamd_strcase_equal); struct rspamd_mime_headers_table { khash_t(rspamd_mime_headers_htb) htb; @@ -34,8 +34,8 @@ struct rspamd_mime_headers_table { }; static void -rspamd_mime_header_check_special (struct rspamd_task *task, - struct rspamd_mime_header *rh) +rspamd_mime_header_check_special(struct rspamd_task *task, + struct rspamd_mime_header *rh) { guint64 h; const gchar *p, *end; @@ -46,46 +46,46 @@ rspamd_mime_header_check_special (struct rspamd_task *task, max_recipients = task->cfg->max_recipients; } - h = rspamd_icase_hash (rh->name, strlen (rh->name), 0xdeadbabe); + h = rspamd_icase_hash(rh->name, strlen(rh->name), 0xdeadbabe); switch (h) { - case 0x88705DC4D9D61ABULL: /* received */ - if (rspamd_received_header_parse(task, rh->decoded, strlen (rh->decoded), rh)) { + case 0x88705DC4D9D61ABULL: /* received */ + if (rspamd_received_header_parse(task, rh->decoded, strlen(rh->decoded), rh)) { rh->flags |= RSPAMD_HEADER_RECEIVED; } break; - case 0x76F31A09F4352521ULL: /* to */ - MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool, - rh->value, strlen (rh->value), - MESSAGE_FIELD (task, rcpt_mime), max_recipients); - rh->flags |= RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + case 0x76F31A09F4352521ULL: /* to */ + MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool, + rh->value, strlen(rh->value), + MESSAGE_FIELD(task, rcpt_mime), max_recipients); + rh->flags |= RSPAMD_HEADER_TO | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE; break; - case 0x7EB117C1480B76ULL: /* cc */ - MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool, - rh->value, strlen (rh->value), - MESSAGE_FIELD (task, rcpt_mime), max_recipients); - rh->flags |= RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + case 0x7EB117C1480B76ULL: /* cc */ + MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool, + rh->value, strlen(rh->value), + MESSAGE_FIELD(task, rcpt_mime), max_recipients); + rh->flags |= RSPAMD_HEADER_CC | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE; break; - case 0xE4923E11C4989C8DULL: /* bcc */ - MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool, - rh->value, strlen (rh->value), - MESSAGE_FIELD (task, rcpt_mime), max_recipients); - rh->flags |= RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE; + case 0xE4923E11C4989C8DULL: /* bcc */ + MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool, + rh->value, strlen(rh->value), + MESSAGE_FIELD(task, rcpt_mime), max_recipients); + rh->flags |= RSPAMD_HEADER_BCC | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE; break; - case 0x41E1985EDC1CBDE4ULL: /* from */ - MESSAGE_FIELD (task, from_mime) = rspamd_email_address_from_mime (task->task_pool, - rh->value, strlen (rh->value), - MESSAGE_FIELD (task, from_mime), max_recipients); - rh->flags |= RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE; + case 0x41E1985EDC1CBDE4ULL: /* from */ + MESSAGE_FIELD(task, from_mime) = rspamd_email_address_from_mime(task->task_pool, + rh->value, strlen(rh->value), + MESSAGE_FIELD(task, from_mime), max_recipients); + rh->flags |= RSPAMD_HEADER_FROM | RSPAMD_HEADER_SENDER | RSPAMD_HEADER_UNIQUE; break; - case 0x43A558FC7C240226ULL: /* message-id */ { + case 0x43A558FC7C240226ULL: /* message-id */ { - rh->flags = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_MESSAGE_ID | RSPAMD_HEADER_UNIQUE; p = rh->decoded; len = rspamd_strip_smtp_comments_inplace(rh->decoded, strlen(p)); rh->decoded[len] = '\0'; /* Zero terminate after stripping */ /* Strip surrounding spaces */ - rh->decoded = g_strstrip (rh->decoded); + rh->decoded = g_strstrip(rh->decoded); end = p + len; if (*p == '<') { @@ -96,14 +96,14 @@ rspamd_mime_header_check_special (struct rspamd_task *task, gchar *d; if (*(end - 1) == '>') { - end --; + end--; } - id = rspamd_mempool_alloc (task->task_pool, end - p + 1); + id = rspamd_mempool_alloc(task->task_pool, end - p + 1); d = id; while (p < end) { - if (g_ascii_isgraph (*p)) { + if (g_ascii_isgraph(*p)) { *d++ = *p++; } else { @@ -114,32 +114,32 @@ rspamd_mime_header_check_special (struct rspamd_task *task, *d = '\0'; - MESSAGE_FIELD (task, message_id) = id; + MESSAGE_FIELD(task, message_id) = id; } break; } - case 0xB91D3910358E8212ULL: /* subject */ - if (MESSAGE_FIELD (task, subject) == NULL) { - MESSAGE_FIELD (task, subject) = rh->decoded; + case 0xB91D3910358E8212ULL: /* subject */ + if (MESSAGE_FIELD(task, subject) == NULL) { + MESSAGE_FIELD(task, subject) = rh->decoded; } - rh->flags = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_SUBJECT | RSPAMD_HEADER_UNIQUE; break; - case 0xEE4AA2EAAC61D6F4ULL: /* return-path */ + case 0xEE4AA2EAAC61D6F4ULL: /* return-path */ if (task->from_envelope == NULL) { - task->from_envelope = rspamd_email_address_from_smtp (rh->decoded, - strlen (rh->decoded)); + task->from_envelope = rspamd_email_address_from_smtp(rh->decoded, + strlen(rh->decoded)); } - rh->flags = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE; + rh->flags = RSPAMD_HEADER_RETURN_PATH | RSPAMD_HEADER_UNIQUE; break; - case 0xB9EEFAD2E93C2161ULL: /* delivered-to */ + case 0xB9EEFAD2E93C2161ULL: /* delivered-to */ if (task->deliver_to == NULL) { task->deliver_to = rh->decoded; } rh->flags = RSPAMD_HEADER_DELIVERED_TO; break; case 0x2EC3BFF3C393FC10ULL: /* date */ - case 0xAC0DDB1A1D214CAULL: /* sender */ + case 0xAC0DDB1A1D214CAULL: /* sender */ case 0x54094572367AB695ULL: /* in-reply-to */ case 0x81CD9E9131AB6A9AULL: /* content-type */ case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */ @@ -150,45 +150,44 @@ rspamd_mime_header_check_special (struct rspamd_task *task, } static void -rspamd_mime_header_add (struct rspamd_task *task, - khash_t(rspamd_mime_headers_htb) *target, - struct rspamd_mime_header **order_ptr, - struct rspamd_mime_header *rh, - gboolean check_special) +rspamd_mime_header_add(struct rspamd_task *task, + khash_t(rspamd_mime_headers_htb) * target, + struct rspamd_mime_header **order_ptr, + struct rspamd_mime_header *rh, + gboolean check_special) { khiter_t k; struct rspamd_mime_header *ex; int res; - k = kh_put (rspamd_mime_headers_htb, target, rh->name, &res); + k = kh_put(rspamd_mime_headers_htb, target, rh->name, &res); if (res == 0) { - ex = kh_value (target, k); - DL_APPEND (ex, rh); - msg_debug_task ("append raw header %s: %s", rh->name, rh->value); + ex = kh_value(target, k); + DL_APPEND(ex, rh); + msg_debug_task("append raw header %s: %s", rh->name, rh->value); } else { - kh_value (target, k) = rh; + kh_value(target, k) = rh; rh->prev = rh; rh->next = NULL; - msg_debug_task ("add new raw header %s: %s", rh->name, rh->value); + msg_debug_task("add new raw header %s: %s", rh->name, rh->value); } - LL_PREPEND2 (*order_ptr, rh, ord_next); + LL_PREPEND2(*order_ptr, rh, ord_next); if (check_special) { - rspamd_mime_header_check_special (task, rh); + rspamd_mime_header_check_special(task, rh); } } /* Convert raw headers to a list of struct raw_header * */ -void -rspamd_mime_headers_process (struct rspamd_task *task, - struct rspamd_mime_headers_table *target, - struct rspamd_mime_header **order_ptr, - const gchar *in, gsize len, - gboolean check_newlines) +void rspamd_mime_headers_process(struct rspamd_task *task, + struct rspamd_mime_headers_table *target, + struct rspamd_mime_header **order_ptr, + const gchar *in, gsize len, + gboolean check_newlines) { struct rspamd_mime_header *nh = NULL; const gchar *p, *c, *end; @@ -201,15 +200,15 @@ rspamd_mime_headers_process (struct rspamd_task *task, p = in; end = p + len; c = p; - memset (nlines_count, 0, sizeof (nlines_count)); - msg_debug_task ("start processing headers"); + memset(nlines_count, 0, sizeof(nlines_count)); + msg_debug_task("start processing headers"); while (p < end) { /* FSM for processing headers */ switch (state) { case 0: /* Begin processing headers */ - if (!g_ascii_isalpha (*p)) { + if (!g_ascii_isalpha(*p)) { /* We have some garbage at the beginning of headers, skip this line */ state = 100; next_state = 0; @@ -222,11 +221,11 @@ rspamd_mime_headers_process (struct rspamd_task *task, case 1: /* We got something like header's name */ if (*p == ':') { - nh = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_header)); + nh = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_header)); l = p - c; - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_null_safe_copy (c, l, tmp, l + 1); + tmp = rspamd_mempool_alloc(task->task_pool, l + 1); + rspamd_null_safe_copy(c, l, tmp, l + 1); nh->name = tmp; nh->flags |= RSPAMD_HEADER_EMPTY_SEPARATOR; nh->raw_value = c; @@ -235,9 +234,9 @@ rspamd_mime_headers_process (struct rspamd_task *task, state = 2; c = p; } - else if (g_ascii_isspace (*p)) { + else if (g_ascii_isspace(*p)) { /* Not header but some garbage */ - if (target == MESSAGE_FIELD (task, raw_headers)) { + if (target == MESSAGE_FIELD(task, raw_headers)) { /* Do not propagate flag from the attachments */ task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } @@ -263,13 +262,13 @@ rspamd_mime_headers_process (struct rspamd_task *task, if (check_newlines) { if (*p == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + nlines_count[RSPAMD_TASK_NEWLINES_LF]++; } else if (p + 1 < end && *(p + 1) == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; + nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++; } else { - nlines_count[RSPAMD_TASK_NEWLINES_CR] ++; + nlines_count[RSPAMD_TASK_NEWLINES_CR]++; } } @@ -277,8 +276,8 @@ rspamd_mime_headers_process (struct rspamd_task *task, state = 99; l = p - c; if (l > 0) { - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_null_safe_copy (c, l, tmp, l + 1); + tmp = rspamd_mempool_alloc(task->task_pool, l + 1); + rspamd_null_safe_copy(c, l, tmp, l + 1); nh->separator = tmp; } next_state = 3; @@ -289,8 +288,8 @@ rspamd_mime_headers_process (struct rspamd_task *task, /* Process value */ l = p - c; if (l >= 0) { - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_null_safe_copy (c, l, tmp, l + 1); + tmp = rspamd_mempool_alloc(task->task_pool, l + 1); + rspamd_null_safe_copy(c, l, tmp, l + 1); nh->separator = tmp; } c = p; @@ -302,13 +301,13 @@ rspamd_mime_headers_process (struct rspamd_task *task, /* Hold folding */ if (check_newlines) { if (*p == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + nlines_count[RSPAMD_TASK_NEWLINES_LF]++; } else if (p + 1 < end && *(p + 1) == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; + nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++; } else { - nlines_count[RSPAMD_TASK_NEWLINES_CR] ++; + nlines_count[RSPAMD_TASK_NEWLINES_CR]++; } } state = 99; @@ -331,7 +330,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, * strings was extremely poor! */ l = p - c; - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + tmp = rspamd_mempool_alloc(task->task_pool, l + 1); tp = tmp; t_state = 0; while (l--) { @@ -353,7 +352,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, } else if (t_state == 1) { /* Inside folding */ - if (g_ascii_isspace (*c)) { + if (g_ascii_isspace(*c)) { c++; } else { @@ -374,8 +373,8 @@ rspamd_mime_headers_process (struct rspamd_task *task, *tp = '\0'; /* Strip the initial spaces that could also be added by folding */ - while (*tmp != '\0' && g_ascii_isspace (*tmp)) { - tmp ++; + while (*tmp != '\0' && g_ascii_isspace(*tmp)) { + tmp++; } if (p + 1 == end) { @@ -389,8 +388,8 @@ rspamd_mime_headers_process (struct rspamd_task *task, gboolean broken_utf = FALSE; - nh->decoded = rspamd_mime_header_decode (task->task_pool, - nh->value, strlen (tmp), &broken_utf); + nh->decoded = rspamd_mime_header_decode(task->task_pool, + nh->value, strlen(tmp), &broken_utf); if (broken_utf) { task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE; @@ -398,26 +397,26 @@ rspamd_mime_headers_process (struct rspamd_task *task, if (nh->decoded == NULL) { /* As we strip comments in place... */ - nh->decoded = rspamd_mempool_strdup (task->task_pool, ""); + nh->decoded = rspamd_mempool_strdup(task->task_pool, ""); } /* We also validate utf8 and replace all non-valid utf8 chars */ - rspamd_mime_charset_utf_enforce (nh->decoded, strlen (nh->decoded)); - nh->order = norder ++; - rspamd_mime_header_add (task, &target->htb, order_ptr, nh, check_newlines); + rspamd_mime_charset_utf_enforce(nh->decoded, strlen(nh->decoded)); + nh->order = norder++; + rspamd_mime_header_add(task, &target->htb, order_ptr, nh, check_newlines); nh = NULL; state = 0; break; case 5: /* Header has only name, no value */ - nh->value = rspamd_mempool_strdup (task->task_pool, ""); - nh->decoded = rspamd_mempool_strdup (task->task_pool, ""); + nh->value = rspamd_mempool_strdup(task->task_pool, ""); + nh->decoded = rspamd_mempool_strdup(task->task_pool, ""); nh->raw_len = p - nh->raw_value; if (shift_by_one) { - nh->raw_len ++; + nh->raw_len++; } - nh->order = norder ++; - rspamd_mime_header_add (task, &target->htb, order_ptr, nh, check_newlines); + nh->order = norder++; + rspamd_mime_header_add(task, &target->htb, order_ptr, nh, check_newlines); nh = NULL; state = 0; break; @@ -440,12 +439,12 @@ rspamd_mime_headers_process (struct rspamd_task *task, } else { if (valid_folding) { - debug_task ("go to state: %d->%d", state, next_state); + debug_task("go to state: %d->%d", state, next_state); state = next_state; } else { /* Fall back */ - debug_task ("go to state: %d->%d", state, err_state); + debug_task("go to state: %d->%d", state, err_state); state = err_state; } } @@ -456,14 +455,14 @@ rspamd_mime_headers_process (struct rspamd_task *task, if (*p == '\r') { if (p + 1 < end && *(p + 1) == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++; + nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++; p++; } p++; state = next_state; } else if (*p == '\n') { - nlines_count[RSPAMD_TASK_NEWLINES_LF] ++; + nlines_count[RSPAMD_TASK_NEWLINES_LF]++; if (p + 1 < end && *(p + 1) == '\r') { p++; @@ -483,7 +482,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, } /* Since we have prepended headers, we need to reverse the list to get the actual order */ - LL_REVERSE (*order_ptr); + LL_REVERSE(*order_ptr); if (check_newlines) { guint max_cnt = 0; @@ -491,57 +490,58 @@ rspamd_mime_headers_process (struct rspamd_task *task, rspamd_cryptobox_hash_state_t hs; guchar hout[rspamd_cryptobox_HASHBYTES], *hexout; - for (gint i = RSPAMD_TASK_NEWLINES_CR; i < RSPAMD_TASK_NEWLINES_MAX; i ++) { + for (gint i = RSPAMD_TASK_NEWLINES_CR; i < RSPAMD_TASK_NEWLINES_MAX; i++) { if (nlines_count[i] > max_cnt) { max_cnt = nlines_count[i]; sel = i; } } - MESSAGE_FIELD (task, nlines_type) = sel; + MESSAGE_FIELD(task, nlines_type) = sel; - rspamd_cryptobox_hash_init (&hs, NULL, 0); + rspamd_cryptobox_hash_init(&hs, NULL, 0); - LL_FOREACH (*order_ptr, nh) { + LL_FOREACH(*order_ptr, nh) + { if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) { - rspamd_cryptobox_hash_update (&hs, nh->name, strlen (nh->name)); + rspamd_cryptobox_hash_update(&hs, nh->name, strlen(nh->name)); } } - rspamd_cryptobox_hash_final (&hs, hout); - hexout = rspamd_mempool_alloc (task->task_pool, sizeof (hout) * 2 + 1); - hexout[sizeof (hout) * 2] = '\0'; - rspamd_encode_hex_buf (hout, sizeof (hout), hexout, - sizeof (hout) * 2 + 1); - rspamd_mempool_set_variable (task->task_pool, - RSPAMD_MEMPOOL_HEADERS_HASH, - hexout, NULL); + rspamd_cryptobox_hash_final(&hs, hout); + hexout = rspamd_mempool_alloc(task->task_pool, sizeof(hout) * 2 + 1); + hexout[sizeof(hout) * 2] = '\0'; + rspamd_encode_hex_buf(hout, sizeof(hout), hexout, + sizeof(hout) * 2 + 1); + rspamd_mempool_set_variable(task->task_pool, + RSPAMD_MEMPOOL_HEADERS_HASH, + hexout, NULL); } } static void -rspamd_mime_header_maybe_save_token (rspamd_mempool_t *pool, - GString *out, - GByteArray *token, - GByteArray *decoded_token, - rspamd_ftok_t *old_charset, - rspamd_ftok_t *new_charset) +rspamd_mime_header_maybe_save_token(rspamd_mempool_t *pool, + GString *out, + GByteArray *token, + GByteArray *decoded_token, + rspamd_ftok_t *old_charset, + rspamd_ftok_t *new_charset) { if (new_charset->len == 0) { - g_assert_not_reached (); + g_assert_not_reached(); } if (old_charset->len > 0) { - if (rspamd_ftok_casecmp (new_charset, old_charset) == 0) { + if (rspamd_ftok_casecmp(new_charset, old_charset) == 0) { rspamd_ftok_t srch; /* * Special case for iso-2022-jp: * https://github.com/vstakhov/rspamd/issues/1669 */ - RSPAMD_FTOK_ASSIGN (&srch, "iso-2022-jp"); + RSPAMD_FTOK_ASSIGN(&srch, "iso-2022-jp"); - if (rspamd_ftok_casecmp (new_charset, &srch) != 0) { + if (rspamd_ftok_casecmp(new_charset, &srch) != 0) { /* We can concatenate buffers, just return */ return; } @@ -549,13 +549,13 @@ rspamd_mime_header_maybe_save_token (rspamd_mempool_t *pool, } /* We need to flush and decode old token to out string */ - if (rspamd_mime_to_utf8_byte_array (token, decoded_token, pool, - rspamd_mime_detect_charset (new_charset, pool))) { - g_string_append_len (out, decoded_token->data, decoded_token->len); + if (rspamd_mime_to_utf8_byte_array(token, decoded_token, pool, + rspamd_mime_detect_charset(new_charset, pool))) { + g_string_append_len(out, decoded_token->data, decoded_token->len); } /* We also reset buffer */ - g_byte_array_set_size (token, 0); + g_byte_array_set_size(token, 0); /* * Propagate charset * @@ -565,19 +565,19 @@ rspamd_mime_header_maybe_save_token (rspamd_mempool_t *pool, * `rspamd_mime_detect_charset` which could be relatively expensive. * But we ignore that for now... */ - memcpy (old_charset, new_charset, sizeof (*old_charset)); + memcpy(old_charset, new_charset, sizeof(*old_charset)); } static void -rspamd_mime_header_sanity_check (GString *str) +rspamd_mime_header_sanity_check(GString *str) { gsize i; gchar t; - for (i = 0; i < str->len; i ++) { + for (i = 0; i < str->len; i++) { t = str->str[i]; - if (!((t & 0x80) || g_ascii_isgraph (t))) { - if (g_ascii_isspace (t)) { + if (!((t & 0x80) || g_ascii_isgraph(t))) { + if (g_ascii_isspace(t)) { /* Replace spaces characters with plain space */ str->str[i] = ' '; } @@ -589,8 +589,8 @@ rspamd_mime_header_sanity_check (GString *str) } gchar * -rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, - gsize inlen, gboolean *invalid_utf) +rspamd_mime_header_decode(rspamd_mempool_t *pool, const gchar *in, + gsize inlen, gboolean *invalid_utf) { GString *out; const guchar *c, *p, *end; @@ -610,20 +610,20 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, skip_spaces, } state = parse_normal; - g_assert (in != NULL); + g_assert(in != NULL); c = in; p = in; end = in + inlen; - out = g_string_sized_new (inlen); - token = g_byte_array_sized_new (80); - decoded = g_byte_array_sized_new (122); + out = g_string_sized_new(inlen); + token = g_byte_array_sized_new(80); + decoded = g_byte_array_sized_new(122); while (p < end) { switch (state) { case parse_normal: if (*p == '=') { - g_string_append_len (out, c, p - c); + g_string_append_len(out, c, p - c); c = p; state = got_eqsign; } @@ -631,17 +631,17 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, gint off = 0; UChar32 uc; /* Unencoded character */ - g_string_append_len (out, c, p - c); + g_string_append_len(out, c, p - c); /* Check if that's valid UTF8 */ - U8_NEXT (p, off, end - p, uc); + U8_NEXT(p, off, end - p, uc); if (uc <= 0) { c = p + 1; /* 0xFFFD in UTF8 */ - g_string_append_len (out, " ", 3); + g_string_append_len(out, " ", 3); off = 0; - U8_APPEND_UNSAFE (out->str + out->len - 3, - off, 0xfffd); + U8_APPEND_UNSAFE(out->str + out->len - 3, + off, 0xfffd); if (invalid_utf) { *invalid_utf = TRUE; @@ -653,7 +653,7 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, continue; /* To avoid p ++ after this block */ } } - p ++; + p++; break; case got_eqsign: if (*p == '?') { @@ -661,27 +661,27 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, qmarks = 0; } else { - g_string_append_len (out, c, 1); + g_string_append_len(out, c, 1); c = p; state = parse_normal; continue; /* Deal with == case */ } - p ++; + p++; break; case got_encoded_start: if (*p == '?') { state = got_more_qmark; - qmarks ++; + qmarks++; /* Skip multiple ? signs */ - p ++; + p++; while (p < end && *p == '?') { - p ++; + p++; } continue; } - p ++; + p++; break; case got_more_qmark: if (*p == '=') { @@ -692,42 +692,45 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, /* Finished encoded boundary */ if (*c == '"') { /* Quoted string, non-RFC conformant but used by retards */ - c ++; + c++; } - if (rspamd_rfc2047_parser (c, p - c + 1, &encoding, - &cur_charset.begin, &cur_charset.len, - &tok_start, &tok_len)) { + if (rspamd_rfc2047_parser(c, p - c + 1, &encoding, + &cur_charset.begin, &cur_charset.len, + &tok_start, &tok_len)) { /* We have a token, so we can decode it from `encoding` */ if (token->len > 0) { if (old_charset.len == 0) { - memcpy (&old_charset, &cur_charset, - sizeof (old_charset)); + memcpy(&old_charset, &cur_charset, + sizeof(old_charset)); } - rspamd_mime_header_maybe_save_token (pool, out, - token, decoded, - &old_charset, &cur_charset); + rspamd_mime_header_maybe_save_token(pool, out, + token, decoded, + &old_charset, &cur_charset); } qmarks = 0; pos = token->len; - g_byte_array_set_size (token, pos + tok_len); + g_byte_array_set_size(token, pos + tok_len); if (encoding == RSPAMD_RFC2047_QP) { - r = rspamd_decode_qp2047_buf (tok_start, tok_len, - token->data + pos, tok_len); + r = rspamd_decode_qp2047_buf(tok_start, tok_len, + token->data + pos, tok_len); if (r != -1) { token->len = pos + r; - } else { + } + else { /* Cannot decode qp */ token->len -= tok_len; } - } else { - if (rspamd_cryptobox_base64_decode (tok_start, tok_len, - token->data + pos, &tok_len)) { + } + else { + if (rspamd_cryptobox_base64_decode(tok_start, tok_len, + token->data + pos, &tok_len)) { token->len = pos + tok_len; - } else { + } + else { /* Cannot decode */ token->len -= tok_len; } @@ -735,30 +738,31 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, c = p + 1; state = skip_spaces; - } else { + } + else { /* Not encoded-word */ old_charset.len = 0; if (token->len > 0) { - rspamd_mime_header_maybe_save_token (pool, out, - token, decoded, - &old_charset, &cur_charset); + rspamd_mime_header_maybe_save_token(pool, out, + token, decoded, + &old_charset, &cur_charset); } - g_string_append_len (out, c, p - c); + g_string_append_len(out, c, p - c); c = p; state = parse_normal; } } /* qmarks >= 3 */ - } /* p == '=' */ + } /* p == '=' */ else { state = got_encoded_start; } - p ++; + p++; break; case skip_spaces: - if (g_ascii_isspace (*p)) { - p ++; + if (g_ascii_isspace(*p)) { + p++; } else if (*p == '=' && p < end - 1 && p[1] == '?') { /* Next boundary, can glue */ @@ -770,12 +774,12 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, /* Need to save spaces and decoded token */ if (token->len > 0) { old_charset.len = 0; - rspamd_mime_header_maybe_save_token (pool, out, - token, decoded, - &old_charset, &cur_charset); + rspamd_mime_header_maybe_save_token(pool, out, + token, decoded, + &old_charset, &cur_charset); } - g_string_append_len (out, c, p - c); + g_string_append_len(out, c, p - c); c = p; state = parse_normal; } @@ -788,49 +792,49 @@ rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, case skip_spaces: if (token->len > 0 && cur_charset.len > 0) { old_charset.len = 0; - rspamd_mime_header_maybe_save_token (pool, out, - token, decoded, - &old_charset, &cur_charset); + rspamd_mime_header_maybe_save_token(pool, out, + token, decoded, + &old_charset, &cur_charset); } break; default: /* Just copy leftover */ if (p > c) { - g_string_append_len (out, c, p - c); + g_string_append_len(out, c, p - c); } break; } - g_byte_array_free (token, TRUE); - g_byte_array_free (decoded, TRUE); - rspamd_mime_header_sanity_check (out); - rspamd_mempool_notify_alloc (pool, out->len); - ret = g_string_free (out, FALSE); - rspamd_mempool_add_destructor (pool, g_free, ret); + g_byte_array_free(token, TRUE); + g_byte_array_free(decoded, TRUE); + rspamd_mime_header_sanity_check(out); + rspamd_mempool_notify_alloc(pool, out->len); + ret = g_string_free(out, FALSE); + rspamd_mempool_add_destructor(pool, g_free, ret); return ret; } gchar * -rspamd_mime_header_encode (const gchar *in, gsize len) +rspamd_mime_header_encode(const gchar *in, gsize len) { const gchar *p = in, *end = in + len; - gchar *out, encode_buf[80 * sizeof (guint32)]; + gchar *out, encode_buf[80 * sizeof(guint32)]; GString *res; gboolean need_encoding = FALSE; /* Check if we need to encode */ while (p < end) { - if ((((guchar)*p) & 0x80) != 0) { + if ((((guchar) *p) & 0x80) != 0) { need_encoding = TRUE; break; } - p ++; + p++; } if (!need_encoding) { - out = g_malloc (len + 1); - rspamd_strlcpy (out, in, len + 1); + out = g_malloc(len + 1); + rspamd_strlcpy(out, in, len + 1); } else { /* Need encode */ @@ -840,82 +844,82 @@ rspamd_mime_header_encode (const gchar *in, gsize len) /* Choose step: =?UTF-8?Q?<qp>?= should be less than 76 chars */ guint step = (76 - 12) / 3 + 1; - ulen = g_utf8_strlen (in, len); - res = g_string_sized_new (len * 2 + 1); + ulen = g_utf8_strlen(in, len); + res = g_string_sized_new(len * 2 + 1); pos = 0; prev = in; /* Adjust chunk size for unicode average length */ - step *= 1.0 * ulen / (gdouble)len; + step *= 1.0 * ulen / (gdouble) len; while (pos < ulen) { - p = g_utf8_offset_to_pointer (in, pos); + p = g_utf8_offset_to_pointer(in, pos); if (p > prev) { /* Encode and print */ - r = rspamd_encode_qp2047_buf (prev, p - prev, - encode_buf, sizeof (encode_buf)); + r = rspamd_encode_qp2047_buf(prev, p - prev, + encode_buf, sizeof(encode_buf)); if (r != -1) { if (res->len > 0) { - rspamd_printf_gstring (res, " =?UTF-8?Q?%*s?=", r, - encode_buf); + rspamd_printf_gstring(res, " =?UTF-8?Q?%*s?=", r, + encode_buf); } else { - rspamd_printf_gstring (res, "=?UTF-8?Q?%*s?=", r, - encode_buf); + rspamd_printf_gstring(res, "=?UTF-8?Q?%*s?=", r, + encode_buf); } } } - pos += MIN (step, ulen - pos); + pos += MIN(step, ulen - pos); prev = p; } /* Leftover */ if (prev < end) { - r = rspamd_encode_qp2047_buf (prev, end - prev, - encode_buf, sizeof (encode_buf)); + r = rspamd_encode_qp2047_buf(prev, end - prev, + encode_buf, sizeof(encode_buf)); if (r != -1) { if (res->len > 0) { - rspamd_printf_gstring (res, " =?UTF-8?Q?%*s?=", r, - encode_buf); + rspamd_printf_gstring(res, " =?UTF-8?Q?%*s?=", r, + encode_buf); } else { - rspamd_printf_gstring (res, "=?UTF-8?Q?%*s?=", r, - encode_buf); + rspamd_printf_gstring(res, "=?UTF-8?Q?%*s?=", r, + encode_buf); } } } - out = g_string_free (res, FALSE); + out = g_string_free(res, FALSE); } return out; } gchar * -rspamd_mime_message_id_generate (const gchar *fqdn) +rspamd_mime_message_id_generate(const gchar *fqdn) { GString *out; guint64 rnd, clk; - out = g_string_sized_new (strlen (fqdn) + 22); - rnd = ottery_rand_uint64 (); - clk = rspamd_get_calendar_ticks () * 1e6; + out = g_string_sized_new(strlen(fqdn) + 22); + rnd = ottery_rand_uint64(); + clk = rspamd_get_calendar_ticks() * 1e6; - rspamd_printf_gstring (out, "%*bs.%*bs@%s", - (gint)sizeof (guint64) - 3, (guchar *)&clk, - (gint)sizeof (guint64), (gchar *)&rnd, - fqdn); + rspamd_printf_gstring(out, "%*bs.%*bs@%s", + (gint) sizeof(guint64) - 3, (guchar *) &clk, + (gint) sizeof(guint64), (gchar *) &rnd, + fqdn); - return g_string_free (out, FALSE); + return g_string_free(out, FALSE); } struct rspamd_mime_header * -rspamd_message_get_header_from_hash (struct rspamd_mime_headers_table *hdrs, - const gchar *field, - gboolean need_modified) +rspamd_message_get_header_from_hash(struct rspamd_mime_headers_table *hdrs, + const gchar *field, + gboolean need_modified) { if (hdrs == NULL) { return NULL; @@ -926,13 +930,13 @@ rspamd_message_get_header_from_hash (struct rspamd_mime_headers_table *hdrs, struct rspamd_mime_header *hdr; if (htb) { - k = kh_get (rspamd_mime_headers_htb, htb, (gchar *) field); + k = kh_get(rspamd_mime_headers_htb, htb, (gchar *) field); - if (k == kh_end (htb)) { + if (k == kh_end(htb)) { return NULL; } - hdr = kh_value (htb, k); + hdr = kh_value(htb, k); if (!need_modified) { if (hdr->flags & RSPAMD_HEADER_NON_EXISTING) { @@ -954,26 +958,24 @@ rspamd_message_get_header_from_hash (struct rspamd_mime_headers_table *hdrs, } struct rspamd_mime_header * -rspamd_message_get_header_array (struct rspamd_task *task, const gchar *field, - gboolean need_modified) +rspamd_message_get_header_array(struct rspamd_task *task, const gchar *field, + gboolean need_modified) { return rspamd_message_get_header_from_hash( - MESSAGE_FIELD_CHECK (task, raw_headers), - field, need_modified); + MESSAGE_FIELD_CHECK(task, raw_headers), + field, need_modified); } -gsize -rspamd_mime_headers_count (struct rspamd_mime_headers_table *hdrs) +gsize rspamd_mime_headers_count(struct rspamd_mime_headers_table *hdrs) { if (hdrs) { - return kh_size (&hdrs->htb); + return kh_size(&hdrs->htb); } return 0; } -bool -rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *hdrs, +bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *hdrs, rspamd_hdr_traverse_func_t func, void *ud) { const gchar *name; @@ -989,46 +991,44 @@ rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *hdrs, } static void -rspamd_message_headers_dtor (struct rspamd_mime_headers_table *hdrs) +rspamd_message_headers_dtor(struct rspamd_mime_headers_table *hdrs) { if (hdrs) { - kfree (hdrs->htb.keys); - kfree (hdrs->htb.vals); - kfree (hdrs->htb.flags); - g_free (hdrs); + kfree(hdrs->htb.keys); + kfree(hdrs->htb.vals); + kfree(hdrs->htb.flags); + g_free(hdrs); } } struct rspamd_mime_headers_table * -rspamd_message_headers_ref (struct rspamd_mime_headers_table *hdrs) +rspamd_message_headers_ref(struct rspamd_mime_headers_table *hdrs) { - REF_RETAIN (hdrs); + REF_RETAIN(hdrs); return hdrs; } -void -rspamd_message_headers_unref (struct rspamd_mime_headers_table *hdrs) +void rspamd_message_headers_unref(struct rspamd_mime_headers_table *hdrs) { - REF_RELEASE (hdrs); + REF_RELEASE(hdrs); } struct rspamd_mime_headers_table * -rspamd_message_headers_new (void) +rspamd_message_headers_new(void) { struct rspamd_mime_headers_table *nhdrs; - nhdrs = g_malloc0 (sizeof (*nhdrs)); - REF_INIT_RETAIN (nhdrs, rspamd_message_headers_dtor); + nhdrs = g_malloc0(sizeof(*nhdrs)); + REF_INIT_RETAIN(nhdrs, rspamd_message_headers_dtor); return nhdrs; } -void -rspamd_message_set_modified_header (struct rspamd_task *task, - struct rspamd_mime_headers_table *hdrs, - const gchar *hdr_name, - const ucl_object_t *obj) +void rspamd_message_set_modified_header(struct rspamd_task *task, + struct rspamd_mime_headers_table *hdrs, + const gchar *hdr_name, + const ucl_object_t *obj) { khiter_t k; khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb; @@ -1036,26 +1036,26 @@ rspamd_message_set_modified_header (struct rspamd_task *task, int i; if (htb) { - k = kh_get (rspamd_mime_headers_htb, htb, (gchar *)hdr_name); + k = kh_get(rspamd_mime_headers_htb, htb, (gchar *) hdr_name); - if (k == kh_end (htb)) { - hdr_elt = rspamd_mempool_alloc0 (task->task_pool, sizeof (*hdr_elt)); + if (k == kh_end(htb)) { + hdr_elt = rspamd_mempool_alloc0(task->task_pool, sizeof(*hdr_elt)); - hdr_elt->flags |= RSPAMD_HEADER_MODIFIED|RSPAMD_HEADER_NON_EXISTING; - hdr_elt->name = rspamd_mempool_strdup (task->task_pool, hdr_name); + hdr_elt->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_NON_EXISTING; + hdr_elt->name = rspamd_mempool_strdup(task->task_pool, hdr_name); int r; - k = kh_put (rspamd_mime_headers_htb, htb, hdr_elt->name, &r); + k = kh_put(rspamd_mime_headers_htb, htb, hdr_elt->name, &r); - kh_value (htb, k) = hdr_elt; + kh_value(htb, k) = hdr_elt; } else { - hdr_elt = kh_value (htb, k); + hdr_elt = kh_value(htb, k); } } else { /* No hash, no modification */ - msg_err_task ("internal error: calling for set_modified_header for no headers"); + msg_err_task("internal error: calling for set_modified_header for no headers"); return; } @@ -1070,49 +1070,51 @@ rspamd_message_set_modified_header (struct rspamd_task *task, ucl_object_iter_t it; /* First, deal with removed headers, copying the relevant headers with remove flag */ - elt = ucl_object_lookup (obj, "remove"); + elt = ucl_object_lookup(obj, "remove"); /* * remove: {1, 2 ...} * where number is the header's position starting from '1' */ - if (elt && ucl_object_type (elt) == UCL_ARRAY) { + if (elt && ucl_object_type(elt) == UCL_ARRAY) { /* First, use a temporary array to keep all headers */ - GPtrArray *existing_ar = g_ptr_array_new (); + GPtrArray *existing_ar = g_ptr_array_new(); struct rspamd_mime_header *cur_hdr; /* Exclude removed headers */ - LL_FOREACH (existing_chain, cur_hdr) { + LL_FOREACH(existing_chain, cur_hdr) + { if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) { - g_ptr_array_add (existing_ar, cur_hdr); + g_ptr_array_add(existing_ar, cur_hdr); } } it = NULL; - while ((cur = ucl_object_iterate (elt, &it, true)) != NULL) { - if (ucl_object_type (cur) == UCL_INT) { - int ord = ucl_object_toint (cur); + while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) { + if (ucl_object_type(cur) == UCL_INT) { + int ord = ucl_object_toint(cur); if (ord == 0) { /* Remove all headers in the existing chain */ - PTR_ARRAY_FOREACH (existing_ar, i, cur_hdr) { - cur_hdr->flags |= RSPAMD_HEADER_MODIFIED|RSPAMD_HEADER_REMOVED; + PTR_ARRAY_FOREACH(existing_ar, i, cur_hdr) + { + cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED; } } else if (ord > 0) { /* Start from the top */ if (ord <= existing_ar->len) { - cur_hdr = g_ptr_array_index (existing_ar, ord - 1); - cur_hdr->flags |= RSPAMD_HEADER_MODIFIED|RSPAMD_HEADER_REMOVED; + cur_hdr = g_ptr_array_index(existing_ar, ord - 1); + cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED; } } else { /* Start from the bottom; ord < 0 */ if ((-ord) <= existing_ar->len) { - cur_hdr = g_ptr_array_index (existing_ar, existing_ar->len + ord); - cur_hdr->flags |= RSPAMD_HEADER_MODIFIED|RSPAMD_HEADER_REMOVED; + cur_hdr = g_ptr_array_index(existing_ar, existing_ar->len + ord); + cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED; } } } @@ -1127,36 +1129,37 @@ rspamd_message_set_modified_header (struct rspamd_task *task, hdr_elt->modified_chain = NULL; gint new_chain_length = 0; - PTR_ARRAY_FOREACH (existing_ar, i, cur_hdr) { + PTR_ARRAY_FOREACH(existing_ar, i, cur_hdr) + { if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) { - struct rspamd_mime_header *nhdr = rspamd_mempool_alloc ( - task->task_pool, sizeof (*nhdr)); - memcpy (nhdr, cur_hdr, sizeof (*nhdr)); + struct rspamd_mime_header *nhdr = rspamd_mempool_alloc( + task->task_pool, sizeof(*nhdr)); + memcpy(nhdr, cur_hdr, sizeof(*nhdr)); nhdr->modified_chain = NULL; nhdr->prev = NULL; nhdr->next = NULL; nhdr->ord_next = NULL; - DL_APPEND (hdr_elt->modified_chain, nhdr); - new_chain_length ++; + DL_APPEND(hdr_elt->modified_chain, nhdr); + new_chain_length++; } } - g_ptr_array_free (existing_ar, TRUE); + g_ptr_array_free(existing_ar, TRUE); /* End of headers removal logic */ } /* We can now deal with headers additions */ - elt = ucl_object_lookup (obj, "add"); - if (elt && ucl_object_type (elt) == UCL_ARRAY) { + elt = ucl_object_lookup(obj, "add"); + if (elt && ucl_object_type(elt) == UCL_ARRAY) { if (!(hdr_elt->flags & RSPAMD_HEADER_MODIFIED)) { /* Copy the header itself to the modified chain */ struct rspamd_mime_header *nhdr; hdr_elt->flags |= RSPAMD_HEADER_MODIFIED; - nhdr = rspamd_mempool_alloc ( - task->task_pool, sizeof (*nhdr)); - memcpy (nhdr, hdr_elt, sizeof (*hdr_elt)); + nhdr = rspamd_mempool_alloc( + task->task_pool, sizeof(*nhdr)); + memcpy(nhdr, hdr_elt, sizeof(*hdr_elt)); nhdr->modified_chain = NULL; nhdr->next = NULL; nhdr->ord_next = NULL; @@ -1170,49 +1173,49 @@ rspamd_message_set_modified_header (struct rspamd_task *task, */ it = NULL; - while ((cur = ucl_object_iterate (elt, &it, true)) != NULL) { - if (ucl_object_type (cur) == UCL_ARRAY) { - const ucl_object_t *order = ucl_array_find_index (cur, 0), - *value = ucl_array_find_index (cur, 1); + while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) { + if (ucl_object_type(cur) == UCL_ARRAY) { + const ucl_object_t *order = ucl_array_find_index(cur, 0), + *value = ucl_array_find_index(cur, 1); if (order && value && - (ucl_object_type (order) == UCL_INT && - ucl_object_type (value) == UCL_STRING)) { - int ord = ucl_object_toint (order); + (ucl_object_type(order) == UCL_INT && + ucl_object_type(value) == UCL_STRING)) { + int ord = ucl_object_toint(order); const char *raw_value; gsize raw_len; - raw_value = ucl_object_tolstring (value, &raw_len); + raw_value = ucl_object_tolstring(value, &raw_len); if (raw_len == 0) { continue; } - struct rspamd_mime_header *nhdr = rspamd_mempool_alloc0 ( - task->task_pool, sizeof (*nhdr)); + struct rspamd_mime_header *nhdr = rspamd_mempool_alloc0( + task->task_pool, sizeof(*nhdr)); nhdr->flags |= RSPAMD_HEADER_ADDED; nhdr->name = hdr_elt->name; - nhdr->value = rspamd_mempool_alloc (task->task_pool, - raw_len + 1); - nhdr->raw_len = rspamd_strlcpy (nhdr->value, raw_value, - raw_len + 1); + nhdr->value = rspamd_mempool_alloc(task->task_pool, + raw_len + 1); + nhdr->raw_len = rspamd_strlcpy(nhdr->value, raw_value, + raw_len + 1); nhdr->raw_value = nhdr->value; - nhdr->decoded = rspamd_mime_header_decode (task->task_pool, - raw_value, raw_len, NULL); + nhdr->decoded = rspamd_mime_header_decode(task->task_pool, + raw_value, raw_len, NULL); /* Now find a position to insert a value */ struct rspamd_mime_header **pos = &hdr_elt->modified_chain; if (ord == 0) { - DL_PREPEND (hdr_elt->modified_chain, nhdr); + DL_PREPEND(hdr_elt->modified_chain, nhdr); } else if (ord == -1) { - DL_APPEND (hdr_elt->modified_chain, nhdr); + DL_APPEND(hdr_elt->modified_chain, nhdr); } else if (ord > 0) { while (ord > 0 && (*pos)) { - ord --; + ord--; pos = &((*pos)->next); } if (*pos) { @@ -1224,26 +1227,25 @@ rspamd_message_set_modified_header (struct rspamd_task *task, } else { /* Last element */ - DL_APPEND (*pos, nhdr); + DL_APPEND(*pos, nhdr); } } else { /* NYI: negative order is not defined */ - msg_err_task ("internal error: calling for set_modified_header " - "with negative add order header"); + msg_err_task("internal error: calling for set_modified_header " + "with negative add order header"); } } else { - msg_err_task ("internal error: calling for set_modified_header " - "with invalid header"); + msg_err_task("internal error: calling for set_modified_header " + "with invalid header"); } } } } } -gsize -rspamd_strip_smtp_comments_inplace (gchar *input, gsize len) +gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len) { enum parser_state { parse_normal, @@ -1251,7 +1253,8 @@ rspamd_strip_smtp_comments_inplace (gchar *input, gsize len) parse_comment, parse_quoted_copy, parse_quoted_ignore, - } state = parse_normal, next_state = parse_normal; + } state = parse_normal, + next_state = parse_normal; gchar *d = input, *end = input + len, *start = input; gchar t; int obraces = 0, ebraces = 0; @@ -1270,15 +1273,15 @@ rspamd_strip_smtp_comments_inplace (gchar *input, gsize len) else { *d++ = t; } - input ++; + input++; break; case parse_obrace: - obraces ++; + obraces++; if (t == '(') { - obraces ++; + obraces++; } else if (t == ')') { - ebraces ++; + ebraces++; if (obraces == ebraces) { obraces = 0; @@ -1293,14 +1296,14 @@ rspamd_strip_smtp_comments_inplace (gchar *input, gsize len) else { state = parse_comment; } - input ++; + input++; break; case parse_comment: if (t == '(') { state = parse_obrace; } else if (t == ')') { - ebraces ++; + ebraces++; if (obraces == ebraces) { obraces = 0; @@ -1312,16 +1315,16 @@ rspamd_strip_smtp_comments_inplace (gchar *input, gsize len) state = parse_quoted_ignore; next_state = parse_comment; } - input ++; + input++; break; case parse_quoted_copy: *d++ = t; state = next_state; - input ++; + input++; break; case parse_quoted_ignore: state = next_state; - input ++; + input++; break; } } diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h index 43dd26e87..7e3cf8420 100644 --- a/src/libmime/mime_headers.h +++ b/src/libmime/mime_headers.h @@ -22,7 +22,7 @@ #include "khash.h" #include "contrib/libucl/ucl.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -49,9 +49,9 @@ enum rspamd_mime_header_flags { RSPAMD_HEADER_UNIQUE = 1u << 12u, RSPAMD_HEADER_EMPTY_SEPARATOR = 1u << 13u, RSPAMD_HEADER_TAB_SEPARATED = 1u << 14u, - RSPAMD_HEADER_MODIFIED = 1u << 15u, /* Means we need to check modified chain */ - RSPAMD_HEADER_ADDED = 1u << 16u, /* A header has been artificially added */ - RSPAMD_HEADER_REMOVED = 1u << 17u, /* A header has been artificially removed */ + RSPAMD_HEADER_MODIFIED = 1u << 15u, /* Means we need to check modified chain */ + RSPAMD_HEADER_ADDED = 1u << 16u, /* A header has been artificially added */ + RSPAMD_HEADER_REMOVED = 1u << 17u, /* A header has been artificially removed */ RSPAMD_HEADER_NON_EXISTING = 1u << 18u, /* Header was not in the original message */ }; @@ -66,8 +66,8 @@ struct rspamd_mime_header { gchar *separator; gchar *decoded; struct rspamd_mime_header *modified_chain; /* Headers modified during transform */ - struct rspamd_mime_header *prev, *next; /* Headers with the same name */ - struct rspamd_mime_header *ord_next; /* Overall order of headers, slist */ + struct rspamd_mime_header *prev, *next; /* Headers with the same name */ + struct rspamd_mime_header *ord_next; /* Overall order of headers, slist */ }; struct rspamd_mime_headers_table; @@ -80,11 +80,11 @@ struct rspamd_mime_headers_table; * @param len * @param check_newlines */ -void rspamd_mime_headers_process (struct rspamd_task *task, - struct rspamd_mime_headers_table *target, - struct rspamd_mime_header **order_ptr, - const gchar *in, gsize len, - gboolean check_newlines); +void rspamd_mime_headers_process(struct rspamd_task *task, + struct rspamd_mime_headers_table *target, + struct rspamd_mime_header **order_ptr, + const gchar *in, gsize len, + gboolean check_newlines); /** * Perform rfc2047 decoding of a header @@ -93,8 +93,8 @@ void rspamd_mime_headers_process (struct rspamd_task *task, * @param inlen * @return */ -gchar *rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, - gsize inlen, gboolean *invalid_utf); +gchar *rspamd_mime_header_decode(rspamd_mempool_t *pool, const gchar *in, + gsize inlen, gboolean *invalid_utf); /** * Encode mime header if needed @@ -102,14 +102,14 @@ gchar *rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in, * @param len * @return newly allocated encoded header */ -gchar *rspamd_mime_header_encode (const gchar *in, gsize len); +gchar *rspamd_mime_header_encode(const gchar *in, gsize len); /** * Generate new unique message id * @param fqdn * @return */ -gchar *rspamd_mime_message_id_generate (const gchar *fqdn); +gchar *rspamd_mime_message_id_generate(const gchar *fqdn); /** * Get an array of header's values with specified header's name using raw headers @@ -118,7 +118,7 @@ gchar *rspamd_mime_message_id_generate (const gchar *fqdn); * @return An array of header's values or NULL. It is NOT permitted to free array or values. */ struct rspamd_mime_header * -rspamd_message_get_header_array (struct rspamd_task *task, +rspamd_message_get_header_array(struct rspamd_task *task, const gchar *field, gboolean need_modified); @@ -129,9 +129,9 @@ rspamd_message_get_header_array (struct rspamd_task *task, * @return An array of header's values or NULL. It is NOT permitted to free array or values. */ struct rspamd_mime_header * -rspamd_message_get_header_from_hash (struct rspamd_mime_headers_table *hdrs, - const gchar *field, - gboolean need_modified); +rspamd_message_get_header_from_hash(struct rspamd_mime_headers_table *hdrs, + const gchar *field, + gboolean need_modified); /** * Modifies a header (or insert one if not found) @@ -140,32 +140,31 @@ rspamd_message_get_header_from_hash (struct rspamd_mime_headers_table *hdrs, * @param obj an array of modified values * */ -void -rspamd_message_set_modified_header (struct rspamd_task *task, - struct rspamd_mime_headers_table *hdrs, - const gchar *hdr_name, - const ucl_object_t *obj); +void rspamd_message_set_modified_header(struct rspamd_task *task, + struct rspamd_mime_headers_table *hdrs, + const gchar *hdr_name, + const ucl_object_t *obj); /** * Cleans up hash table of the headers * @param htb */ -void rspamd_message_headers_unref (struct rspamd_mime_headers_table *hdrs); +void rspamd_message_headers_unref(struct rspamd_mime_headers_table *hdrs); -struct rspamd_mime_headers_table * rspamd_message_headers_ref (struct rspamd_mime_headers_table *hdrs); +struct rspamd_mime_headers_table *rspamd_message_headers_ref(struct rspamd_mime_headers_table *hdrs); /** * Init headers hash * @return */ -struct rspamd_mime_headers_table* rspamd_message_headers_new (void); +struct rspamd_mime_headers_table *rspamd_message_headers_new(void); /** * Returns size for a headers table * @param hdrs * @return */ -gsize rspamd_mime_headers_count (struct rspamd_mime_headers_table *hdrs); +gsize rspamd_mime_headers_count(struct rspamd_mime_headers_table *hdrs); typedef bool(rspamd_hdr_traverse_func_t)(const gchar *, const struct rspamd_mime_header *, void *); /** @@ -175,7 +174,7 @@ typedef bool(rspamd_hdr_traverse_func_t)(const gchar *, const struct rspamd_mime * @return */ bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *, - rspamd_hdr_traverse_func_t func, void *ud); + rspamd_hdr_traverse_func_t func, void *ud); /** * Strip rfc822 CFWS sequences from a string in place @@ -183,9 +182,9 @@ bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *, * @param len length of the input * @return new length of the input */ -gsize rspamd_strip_smtp_comments_inplace (gchar *input, gsize len); +gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 48b946d8f..217f0b87d 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -38,10 +38,10 @@ struct rspamd_mime_parser_lib_ctx *lib_ctx = NULL; static const guint max_nested = 64; static const guint max_key_usages = 10000; -#define msg_debug_mime(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \ - rspamd_mime_log_id, "mime", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) +#define msg_debug_mime(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \ + rspamd_mime_log_id, "mime", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) INIT_LOG_MODULE(mime) @@ -57,7 +57,7 @@ struct rspamd_mime_boundary { }; struct rspamd_mime_parser_ctx { - GPtrArray *stack; /* Stack of parts */ + GPtrArray *stack; /* Stack of parts */ GArray *boundaries; /* Boundaries found in the whole message */ const gchar *start; const gchar *pos; @@ -67,40 +67,40 @@ struct rspamd_mime_parser_ctx { }; static enum rspamd_mime_parse_error -rspamd_mime_parse_multipart_part (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - GError **err); +rspamd_mime_parse_multipart_part(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + GError **err); static enum rspamd_mime_parse_error -rspamd_mime_parse_message (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - GError **err); +rspamd_mime_parse_message(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + GError **err); static enum rspamd_mime_parse_error -rspamd_mime_parse_normal_part (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - struct rspamd_content_type *ct, - GError **err); +rspamd_mime_parse_normal_part(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + struct rspamd_content_type *ct, + GError **err); static enum rspamd_mime_parse_error -rspamd_mime_process_multipart_node (struct rspamd_task *task, - struct rspamd_mime_parser_ctx *st, - struct rspamd_mime_part *multipart, - const gchar *start, const gchar *end, - gboolean is_finished, - GError **err); +rspamd_mime_process_multipart_node(struct rspamd_task *task, + struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_part *multipart, + const gchar *start, const gchar *end, + gboolean is_finished, + GError **err); #define RSPAMD_MIME_QUARK (rspamd_mime_parser_quark()) static GQuark -rspamd_mime_parser_quark (void) +rspamd_mime_parser_quark(void) { - return g_quark_from_static_string ("mime-parser"); + return g_quark_from_static_string("mime-parser"); } -const gchar* -rspamd_cte_to_string (enum rspamd_cte ct) +const gchar * +rspamd_cte_to_string(enum rspamd_cte ct) { const gchar *ret = "unknown"; @@ -128,31 +128,31 @@ rspamd_cte_to_string (enum rspamd_cte ct) } enum rspamd_cte -rspamd_cte_from_string (const gchar *str) +rspamd_cte_from_string(const gchar *str) { enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN; - g_assert (str != NULL); + g_assert(str != NULL); - if (strcmp (str, "7bit") == 0) { + if (strcmp(str, "7bit") == 0) { ret = RSPAMD_CTE_7BIT; } - else if (strcmp (str, "8bit") == 0) { + else if (strcmp(str, "8bit") == 0) { ret = RSPAMD_CTE_8BIT; } - else if (strcmp (str, "quoted-printable") == 0) { + else if (strcmp(str, "quoted-printable") == 0) { ret = RSPAMD_CTE_QP; } - else if (strcmp (str, "base64") == 0) { + else if (strcmp(str, "base64") == 0) { ret = RSPAMD_CTE_B64; } - else if (strcmp (str, "X-uuencode") == 0) { + else if (strcmp(str, "X-uuencode") == 0) { ret = RSPAMD_CTE_UUE; } - else if (strcmp (str, "uuencode") == 0) { + else if (strcmp(str, "uuencode") == 0) { ret = RSPAMD_CTE_UUE; } - else if (strcmp (str, "X-uue") == 0) { + else if (strcmp(str, "X-uue") == 0) { ret = RSPAMD_CTE_UUE; } @@ -160,32 +160,32 @@ rspamd_cte_from_string (const gchar *str) } static void -rspamd_mime_parser_init_lib (void) +rspamd_mime_parser_init_lib(void) { - lib_ctx = g_malloc0 (sizeof (*lib_ctx)); - lib_ctx->mp_boundary = rspamd_multipattern_create (RSPAMD_MULTIPATTERN_DEFAULT); - g_assert (lib_ctx->mp_boundary != NULL); - rspamd_multipattern_add_pattern (lib_ctx->mp_boundary, "\r--", 0); - rspamd_multipattern_add_pattern (lib_ctx->mp_boundary, "\n--", 0); + lib_ctx = g_malloc0(sizeof(*lib_ctx)); + lib_ctx->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT); + g_assert(lib_ctx->mp_boundary != NULL); + rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\r--", 0); + rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\n--", 0); GError *err = NULL; - if (!rspamd_multipattern_compile (lib_ctx->mp_boundary, &err)) { - msg_err ("fatal error: cannot compile multipattern for mime parser boundaries: %e", err); - g_error_free (err); + if (!rspamd_multipattern_compile(lib_ctx->mp_boundary, &err)) { + msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err); + g_error_free(err); g_abort(); } - ottery_rand_bytes (lib_ctx->hkey, sizeof (lib_ctx->hkey)); + ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey)); } static enum rspamd_cte -rspamd_mime_parse_cte (const gchar *in, gsize len) +rspamd_mime_parse_cte(const gchar *in, gsize len) { guint64 h; enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN; - in = rspamd_string_len_strip (in, &len, " \t;,.+-#!`~'"); - h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, - in, len, 0xdeadbabe); + in = rspamd_string_len_strip(in, &len, " \t;,.+-#!`~'"); + h = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64, + in, len, 0xdeadbabe); switch (h) { case 0xCEDAA7056B4753F7ULL: /* 7bit */ @@ -213,65 +213,65 @@ rspamd_mime_parse_cte (const gchar *in, gsize len) } static enum rspamd_cte -rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, - struct rspamd_mime_part *part) +rspamd_mime_part_get_cte_heuristic(struct rspamd_task *task, + struct rspamd_mime_part *part) { const guint check_len = 128; guint real_len, nspaces = 0, neqsign = 0, n8bit = 0, nqpencoded = 0, - padeqsign = 0, nupper = 0, nlower = 0; + padeqsign = 0, nupper = 0, nlower = 0; gboolean b64_chars = TRUE; const guchar *p, *end; enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN; - real_len = MIN (check_len, part->raw_data.len); - p = (const guchar *)part->raw_data.begin; + real_len = MIN(check_len, part->raw_data.len); + p = (const guchar *) part->raw_data.begin; end = p + part->raw_data.len; - while (p < end && g_ascii_isspace (*p)) { - p ++; + while (p < end && g_ascii_isspace(*p)) { + p++; } - if (end - p > sizeof ("begin-base64 ")) { + if (end - p > sizeof("begin-base64 ")) { const guchar *uue_start; - if (memcmp (p, "begin ", sizeof ("begin ") - 1) == 0) { - uue_start = p + sizeof ("begin ") - 1; + if (memcmp(p, "begin ", sizeof("begin ") - 1) == 0) { + uue_start = p + sizeof("begin ") - 1; - while (uue_start < end && g_ascii_isspace (*uue_start)) { - uue_start ++; + while (uue_start < end && g_ascii_isspace(*uue_start)) { + uue_start++; } - if (uue_start < end && g_ascii_isdigit (*uue_start)) { + if (uue_start < end && g_ascii_isdigit(*uue_start)) { return RSPAMD_CTE_UUE; } } - else if (memcmp (p, "begin-base64 ", sizeof ("begin-base64 ") - 1) == 0) { - uue_start = p + sizeof ("begin ") - 1; + else if (memcmp(p, "begin-base64 ", sizeof("begin-base64 ") - 1) == 0) { + uue_start = p + sizeof("begin ") - 1; - while (uue_start < end && g_ascii_isspace (*uue_start)) { - uue_start ++; + while (uue_start < end && g_ascii_isspace(*uue_start)) { + uue_start++; } - if (uue_start < end && g_ascii_isdigit (*uue_start)) { + if (uue_start < end && g_ascii_isdigit(*uue_start)) { return RSPAMD_CTE_UUE; } } } /* Skip trailing spaces */ - while (end > p && g_ascii_isspace (*(end - 1))) { - end --; + while (end > p && g_ascii_isspace(*(end - 1))) { + end--; } if (end > p + 2) { if (*(end - 1) == '=') { - padeqsign ++; - end --; + padeqsign++; + end--; } if (*(end - 1) == '=') { - padeqsign ++; - end --; + padeqsign++; + end--; } } @@ -282,35 +282,35 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, while (p < end) { if (*p == ' ') { - nspaces ++; + nspaces++; } else if (*p == '=') { b64_chars = FALSE; /* Eqsign must not be inside base64 */ - neqsign ++; - p ++; + neqsign++; + p++; - if (p + 2 < end && g_ascii_isxdigit (*p) && g_ascii_isxdigit (*(p + 1))) { - p ++; - nqpencoded ++; + if (p + 2 < end && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*(p + 1))) { + p++; + nqpencoded++; } continue; } else if (*p >= 0x80) { - n8bit ++; + n8bit++; b64_chars = FALSE; } - else if (!(g_ascii_isalnum (*p) || *p == '/' || *p == '+')) { + else if (!(g_ascii_isalnum(*p) || *p == '/' || *p == '+')) { b64_chars = FALSE; } - else if (g_ascii_isupper (*p)) { - nupper ++; + else if (g_ascii_isupper(*p)) { + nupper++; } - else if (g_ascii_islower (*p)) { - nlower ++; + else if (g_ascii_islower(*p)) { + nlower++; } - p ++; + p++; } if (b64_chars && neqsign <= 2 && nspaces == 0) { @@ -342,7 +342,7 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, } else { - if (((end - (const guchar *)part->raw_data.begin) + padeqsign) % 4 == 0) { + if (((end - (const guchar *) part->raw_data.begin) + padeqsign) % 4 == 0) { if (padeqsign == 0) { /* * It can be either base64 or plain text, hard to say @@ -355,7 +355,6 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, else { ret = RSPAMD_CTE_7BIT; } - } else { ret = RSPAMD_CTE_B64; @@ -384,16 +383,16 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task, ret = RSPAMD_CTE_8BIT; } - msg_debug_mime ("detected cte: %s", rspamd_cte_to_string (ret)); + msg_debug_mime("detected cte: %s", rspamd_cte_to_string(ret)); return ret; } static void -rspamd_mime_part_get_cte (struct rspamd_task *task, - struct rspamd_mime_headers_table *hdrs, - struct rspamd_mime_part *part, - gboolean apply_heuristic) +rspamd_mime_part_get_cte(struct rspamd_task *task, + struct rspamd_mime_headers_table *hdrs, + struct rspamd_mime_part *part, + gboolean apply_heuristic) { struct rspamd_mime_header *hdr, *cur; enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN; @@ -403,7 +402,7 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, if (hdr == NULL) { if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN && - !(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) { + !(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) { part->cte = part->parent_part->cte; parent_propagated = TRUE; @@ -411,21 +410,22 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, } if (apply_heuristic) { - part->cte = rspamd_mime_part_get_cte_heuristic (task, part); - msg_info_task ("detected missing CTE for part as: %s", - rspamd_cte_to_string (part->cte)); + part->cte = rspamd_mime_part_get_cte_heuristic(task, part); + msg_info_task("detected missing CTE for part as: %s", + rspamd_cte_to_string(part->cte)); } part->flags |= RSPAMD_MIME_PART_MISSING_CTE; } else { - DL_FOREACH (hdr, cur) { + DL_FOREACH(hdr, cur) + { gsize hlen; gchar lc_buf[128]; - hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", cur->value); - rspamd_str_lc (lc_buf, hlen); - cte = rspamd_mime_parse_cte (lc_buf, hlen); + hlen = rspamd_snprintf(lc_buf, sizeof(lc_buf), "%s", cur->value); + rspamd_str_lc(lc_buf, hlen); + cte = rspamd_mime_parse_cte(lc_buf, hlen); if (cte != RSPAMD_CTE_UNKNOWN) { part->cte = cte; @@ -433,45 +433,45 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, } } -check_cte: + check_cte: if (apply_heuristic) { if (part->cte == RSPAMD_CTE_UNKNOWN) { - part->cte = rspamd_mime_part_get_cte_heuristic (task, part); + part->cte = rspamd_mime_part_get_cte_heuristic(task, part); - msg_info_task ("corrected bad CTE for part to: %s", - rspamd_cte_to_string (part->cte)); + msg_info_task("corrected bad CTE for part to: %s", + rspamd_cte_to_string(part->cte)); } else if (part->cte == RSPAMD_CTE_B64 || - part->cte == RSPAMD_CTE_QP) { + part->cte == RSPAMD_CTE_QP) { /* Additionally check sanity */ - cte = rspamd_mime_part_get_cte_heuristic (task, part); + cte = rspamd_mime_part_get_cte_heuristic(task, part); if (cte == RSPAMD_CTE_8BIT) { - msg_info_task ( - "incorrect cte specified for part: %s, %s detected", - rspamd_cte_to_string (part->cte), - rspamd_cte_to_string (cte)); + msg_info_task( + "incorrect cte specified for part: %s, %s detected", + rspamd_cte_to_string(part->cte), + rspamd_cte_to_string(cte)); part->cte = cte; part->flags |= RSPAMD_MIME_PART_BAD_CTE; } else if (cte != part->cte && parent_propagated) { part->cte = cte; - msg_info_task ("detected missing CTE for part as: %s", - rspamd_cte_to_string (part->cte)); + msg_info_task("detected missing CTE for part as: %s", + rspamd_cte_to_string(part->cte)); } } else { - msg_debug_mime ("processed cte: %s", - rspamd_cte_to_string (cte)); + msg_debug_mime("processed cte: %s", + rspamd_cte_to_string(cte)); } } else { - msg_debug_mime ("processed cte: %s", rspamd_cte_to_string (cte)); + msg_debug_mime("processed cte: %s", rspamd_cte_to_string(cte)); } } } static void -rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part) +rspamd_mime_part_get_cd(struct rspamd_task *task, struct rspamd_mime_part *part) { struct rspamd_mime_header *hdr, *cur; struct rspamd_content_disposition *cd = NULL; @@ -479,62 +479,63 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part struct rspamd_content_type_param *found; hdr = rspamd_message_get_header_from_hash(part->raw_headers, - "Content-Disposition", FALSE); + "Content-Disposition", FALSE); if (hdr == NULL) { - cd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cd)); + cd = rspamd_mempool_alloc0(task->task_pool, sizeof(*cd)); cd->type = RSPAMD_CT_INLINE; /* We can also have content disposition definitions in Content-Type */ if (part->ct && part->ct->attrs) { - RSPAMD_FTOK_ASSIGN (&srch, "name"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "name"); + found = g_hash_table_lookup(part->ct->attrs, &srch); if (!found) { - RSPAMD_FTOK_ASSIGN (&srch, "filename"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "filename"); + found = g_hash_table_lookup(part->ct->attrs, &srch); } if (found) { cd->type = RSPAMD_CT_ATTACHMENT; - memcpy (&cd->filename, &found->value, sizeof (cd->filename)); + memcpy(&cd->filename, &found->value, sizeof(cd->filename)); } } } else { - DL_FOREACH (hdr, cur) { + DL_FOREACH(hdr, cur) + { gsize hlen; cd = NULL; if (cur->value) { - hlen = strlen (cur->value); - cd = rspamd_content_disposition_parse (cur->value, hlen, - task->task_pool); + hlen = strlen(cur->value); + cd = rspamd_content_disposition_parse(cur->value, hlen, + task->task_pool); } if (cd) { /* We still need to check filename */ if (cd->filename.len == 0) { if (part->ct && part->ct->attrs) { - RSPAMD_FTOK_ASSIGN (&srch, "name"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "name"); + found = g_hash_table_lookup(part->ct->attrs, &srch); if (!found) { - RSPAMD_FTOK_ASSIGN (&srch, "filename"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "filename"); + found = g_hash_table_lookup(part->ct->attrs, &srch); } if (found) { cd->type = RSPAMD_CT_ATTACHMENT; - memcpy (&cd->filename, &found->value, - sizeof (cd->filename)); + memcpy(&cd->filename, &found->value, + sizeof(cd->filename)); } } } - msg_debug_mime ("processed content disposition: %s, file: \"%T\"", - cd->lc_data, &cd->filename); + msg_debug_mime("processed content disposition: %s, file: \"%T\"", + cd->lc_data, &cd->filename); break; } else if (part->ct) { @@ -542,22 +543,22 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part * Even in case of malformed Content-Disposition, we can still * fall back to Content-Type */ - cd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cd)); + cd = rspamd_mempool_alloc0(task->task_pool, sizeof(*cd)); cd->type = RSPAMD_CT_INLINE; /* We can also have content disposition definitions in Content-Type */ if (part->ct->attrs) { - RSPAMD_FTOK_ASSIGN (&srch, "name"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "name"); + found = g_hash_table_lookup(part->ct->attrs, &srch); if (!found) { - RSPAMD_FTOK_ASSIGN (&srch, "filename"); - found = g_hash_table_lookup (part->ct->attrs, &srch); + RSPAMD_FTOK_ASSIGN(&srch, "filename"); + found = g_hash_table_lookup(part->ct->attrs, &srch); } if (found) { cd->type = RSPAMD_CT_ATTACHMENT; - memcpy (&cd->filename, &found->value, sizeof (cd->filename)); + memcpy(&cd->filename, &found->value, sizeof(cd->filename)); } } } @@ -567,43 +568,98 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part part->cd = cd; } -void -rspamd_mime_parser_calc_digest (struct rspamd_mime_part *part) +void rspamd_mime_parser_calc_digest(struct rspamd_mime_part *part) { /* Blake2b applied to string 'rspamd' */ static const guchar hash_key[] = { - 0xef,0x43,0xae,0x80,0xcc,0x8d,0xc3,0x4c, - 0x6f,0x1b,0xd6,0x18,0x1b,0xae,0x87,0x74, - 0x0c,0xca,0xf7,0x8e,0x5f,0x2e,0x54,0x32, - 0xf6,0x79,0xb9,0x27,0x26,0x96,0x20,0x92, - 0x70,0x07,0x85,0xeb,0x83,0xf7,0x89,0xe0, - 0xd7,0x32,0x2a,0xd2,0x1a,0x64,0x41,0xef, - 0x49,0xff,0xc3,0x8c,0x54,0xf9,0x67,0x74, - 0x30,0x1e,0x70,0x2e,0xb7,0x12,0x09,0xfe, + 0xef, + 0x43, + 0xae, + 0x80, + 0xcc, + 0x8d, + 0xc3, + 0x4c, + 0x6f, + 0x1b, + 0xd6, + 0x18, + 0x1b, + 0xae, + 0x87, + 0x74, + 0x0c, + 0xca, + 0xf7, + 0x8e, + 0x5f, + 0x2e, + 0x54, + 0x32, + 0xf6, + 0x79, + 0xb9, + 0x27, + 0x26, + 0x96, + 0x20, + 0x92, + 0x70, + 0x07, + 0x85, + 0xeb, + 0x83, + 0xf7, + 0x89, + 0xe0, + 0xd7, + 0x32, + 0x2a, + 0xd2, + 0x1a, + 0x64, + 0x41, + 0xef, + 0x49, + 0xff, + 0xc3, + 0x8c, + 0x54, + 0xf9, + 0x67, + 0x74, + 0x30, + 0x1e, + 0x70, + 0x2e, + 0xb7, + 0x12, + 0x09, + 0xfe, }; if (part->parsed_data.len > 0) { - rspamd_cryptobox_hash (part->digest, - part->parsed_data.begin, part->parsed_data.len, - hash_key, sizeof (hash_key)); + rspamd_cryptobox_hash(part->digest, + part->parsed_data.begin, part->parsed_data.len, + hash_key, sizeof(hash_key)); } } static enum rspamd_mime_parse_error -rspamd_mime_parse_normal_part (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - struct rspamd_content_type *ct, - GError **err) +rspamd_mime_parse_normal_part(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + struct rspamd_content_type *ct, + GError **err) { rspamd_fstring_t *parsed; gssize r; - g_assert (part != NULL); + g_assert(part != NULL); - rspamd_mime_part_get_cte (task, part->raw_headers, part, - part->ct && !(part->ct->flags & RSPAMD_CONTENT_TYPE_MESSAGE)); - rspamd_mime_part_get_cd (task, part); + rspamd_mime_part_get_cte(task, part->raw_headers, part, + part->ct && !(part->ct->flags & RSPAMD_CONTENT_TYPE_MESSAGE)); + rspamd_mime_part_get_cd(task, part); switch (part->cte) { case RSPAMD_CTE_7BIT: @@ -626,8 +682,8 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, * UTF8, we can still imply Content-Type == text/plain */ - if (rspamd_str_has_8bit (part->raw_data.begin, part->raw_data.len) && - !rspamd_fast_utf8_validate (part->raw_data.begin, part->raw_data.len)) { + if (rspamd_str_has_8bit(part->raw_data.begin, part->raw_data.len) && + !rspamd_fast_utf8_validate(part->raw_data.begin, part->raw_data.len)) { part->ct->flags &= ~RSPAMD_CONTENT_TYPE_TEXT; part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; } @@ -636,14 +692,14 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) { /* Need to copy text as we have couple of in-place change functions */ - parsed = rspamd_fstring_sized_new (part->raw_data.len); + parsed = rspamd_fstring_sized_new(part->raw_data.len); parsed->len = part->raw_data.len; - memcpy (parsed->str, part->raw_data.begin, parsed->len); + memcpy(parsed->str, part->raw_data.begin, parsed->len); part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); } else { part->parsed_data.begin = part->raw_data.begin; @@ -651,138 +707,138 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, } break; case RSPAMD_CTE_QP: - parsed = rspamd_fstring_sized_new (part->raw_data.len); - r = rspamd_decode_qp_buf (part->raw_data.begin, part->raw_data.len, - parsed->str, parsed->allocated); + parsed = rspamd_fstring_sized_new(part->raw_data.len); + r = rspamd_decode_qp_buf(part->raw_data.begin, part->raw_data.len, + parsed->str, parsed->allocated); if (r != -1) { parsed->len = r; part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); } else { - msg_err_task ("invalid quoted-printable encoded part, assume 8bit"); + msg_err_task("invalid quoted-printable encoded part, assume 8bit"); if (part->ct) { part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; } part->cte = RSPAMD_CTE_8BIT; - memcpy (parsed->str, part->raw_data.begin, part->raw_data.len); + memcpy(parsed->str, part->raw_data.begin, part->raw_data.len); parsed->len = part->raw_data.len; part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); } break; case RSPAMD_CTE_B64: - parsed = rspamd_fstring_sized_new (part->raw_data.len / 4 * 3 + 12); - rspamd_cryptobox_base64_decode (part->raw_data.begin, - part->raw_data.len, - parsed->str, &parsed->len); + parsed = rspamd_fstring_sized_new(part->raw_data.len / 4 * 3 + 12); + rspamd_cryptobox_base64_decode(part->raw_data.begin, + part->raw_data.len, + parsed->str, &parsed->len); part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); break; case RSPAMD_CTE_UUE: - parsed = rspamd_fstring_sized_new (part->raw_data.len / 4 * 3 + 12); - r = rspamd_decode_uue_buf (part->raw_data.begin, part->raw_data.len, - parsed->str, parsed->allocated); - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, parsed); + parsed = rspamd_fstring_sized_new(part->raw_data.len / 4 * 3 + 12); + r = rspamd_decode_uue_buf(part->raw_data.begin, part->raw_data.len, + parsed->str, parsed->allocated); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed); if (r != -1) { parsed->len = r; part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; } else { - msg_err_task ("invalid uuencoding in encoded part, assume 8bit"); + msg_err_task("invalid uuencoding in encoded part, assume 8bit"); if (part->ct) { part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; } part->cte = RSPAMD_CTE_8BIT; - parsed->len = MIN (part->raw_data.len, parsed->allocated); - memcpy (parsed->str, part->raw_data.begin, parsed->len); - rspamd_mempool_notify_alloc (task->task_pool, parsed->len); + parsed->len = MIN(part->raw_data.len, parsed->allocated); + memcpy(parsed->str, part->raw_data.begin, parsed->len); + rspamd_mempool_notify_alloc(task->task_pool, parsed->len); part->parsed_data.begin = parsed->str; part->parsed_data.len = parsed->len; } break; default: - g_assert_not_reached (); + g_assert_not_reached(); } - part->part_number = MESSAGE_FIELD (task, parts)->len; - part->urls = g_ptr_array_new (); - g_ptr_array_add (MESSAGE_FIELD (task, parts), part); - msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte", - &part->ct->type, &part->ct->subtype, part->parsed_data.len, - part->raw_data.len, rspamd_cte_to_string (part->cte)); - rspamd_mime_parser_calc_digest (part); + part->part_number = MESSAGE_FIELD(task, parts)->len; + part->urls = g_ptr_array_new(); + g_ptr_array_add(MESSAGE_FIELD(task, parts), part); + msg_debug_mime("parsed data part %T/%T of length %z (%z orig), %s cte", + &part->ct->type, &part->ct->subtype, part->parsed_data.len, + part->raw_data.len, rspamd_cte_to_string(part->cte)); + rspamd_mime_parser_calc_digest(part); if (ct && (ct->flags & RSPAMD_CONTENT_TYPE_SMIME)) { CMS_ContentInfo *cms; const unsigned char *der_beg = part->parsed_data.begin; - cms = d2i_CMS_ContentInfo (NULL, &der_beg, part->parsed_data.len); + cms = d2i_CMS_ContentInfo(NULL, &der_beg, part->parsed_data.len); if (cms) { - const ASN1_OBJECT *asn_ct = CMS_get0_eContentType (cms); - int ct_nid = OBJ_obj2nid (asn_ct); + const ASN1_OBJECT *asn_ct = CMS_get0_eContentType(cms); + int ct_nid = OBJ_obj2nid(asn_ct); if (ct_nid == NID_pkcs7_data) { - BIO *bio = BIO_new_mem_buf (part->parsed_data.begin, - part->parsed_data.len); + BIO *bio = BIO_new_mem_buf(part->parsed_data.begin, + part->parsed_data.len); PKCS7 *p7; - p7 = d2i_PKCS7_bio (bio, NULL); + p7 = d2i_PKCS7_bio(bio, NULL); if (p7) { - ct_nid = OBJ_obj2nid (p7->type); + ct_nid = OBJ_obj2nid(p7->type); if (ct_nid == NID_pkcs7_signed) { PKCS7 *p7_signed_content = p7->d.sign->contents; - ct_nid = OBJ_obj2nid (p7_signed_content->type); + ct_nid = OBJ_obj2nid(p7_signed_content->type); if (ct_nid == NID_pkcs7_data && p7_signed_content->d.data) { int ret; - msg_debug_mime ("found an additional part inside of " - "smime structure of type %T/%T; length=%d", - &ct->type, &ct->subtype, p7_signed_content->d.data->length); + msg_debug_mime("found an additional part inside of " + "smime structure of type %T/%T; length=%d", + &ct->type, &ct->subtype, p7_signed_content->d.data->length); /* * Since ASN.1 structures are freed, we need to copy * the content */ - gchar *cpy = rspamd_mempool_alloc (task->task_pool, - p7_signed_content->d.data->length); - memcpy (cpy, p7_signed_content->d.data->data, - p7_signed_content->d.data->length); - ret = rspamd_mime_process_multipart_node (task, - st, NULL, - cpy,cpy + p7_signed_content->d.data->length, - TRUE, err); - - PKCS7_free (p7); - BIO_free (bio); - CMS_ContentInfo_free (cms); + gchar *cpy = rspamd_mempool_alloc(task->task_pool, + p7_signed_content->d.data->length); + memcpy(cpy, p7_signed_content->d.data->data, + p7_signed_content->d.data->length); + ret = rspamd_mime_process_multipart_node(task, + st, NULL, + cpy, cpy + p7_signed_content->d.data->length, + TRUE, err); + + PKCS7_free(p7); + BIO_free(bio); + CMS_ContentInfo_free(cms); return ret; } } - PKCS7_free (p7); + PKCS7_free(p7); } - BIO_free (bio); + BIO_free(bio); } - CMS_ContentInfo_free (cms); + CMS_ContentInfo_free(cms); } } @@ -800,12 +856,12 @@ struct rspamd_mime_multipart_cbdata { }; static enum rspamd_mime_parse_error -rspamd_mime_process_multipart_node (struct rspamd_task *task, - struct rspamd_mime_parser_ctx *st, - struct rspamd_mime_part *multipart, - const gchar *start, const gchar *end, - gboolean is_finished, - GError **err) +rspamd_mime_process_multipart_node(struct rspamd_task *task, + struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_part *multipart, + const gchar *start, const gchar *end, + gboolean is_finished, + GError **err) { struct rspamd_content_type *ct, *sel = NULL; struct rspamd_mime_header *hdr = NULL, *cur; @@ -815,7 +871,7 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_FATAL; - str.str = (gchar *)start; + str.str = (gchar *) start; str.len = end - start; if (*start == '\n' || *start == '\r') { @@ -833,11 +889,11 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, gboolean seen_something = FALSE; while (p < end) { - if (g_ascii_isalnum (*p)) { + if (g_ascii_isalnum(*p)) { seen_something = TRUE; break; } - p ++; + p++; } if (!seen_something) { @@ -846,21 +902,21 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, } } else { - hdr_pos = rspamd_string_find_eoh (&str, &body_pos); + hdr_pos = rspamd_string_find_eoh(&str, &body_pos); } - npart = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_part)); + npart = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_part)); npart->parent_part = multipart; - npart->raw_headers = rspamd_message_headers_new (); + npart->raw_headers = rspamd_message_headers_new(); npart->headers_order = NULL; if (multipart) { if (multipart->specific.mp->children == NULL) { - multipart->specific.mp->children = g_ptr_array_sized_new (2); + multipart->specific.mp->children = g_ptr_array_sized_new(2); } - g_ptr_array_add (multipart->specific.mp->children, npart); + g_ptr_array_add(multipart->specific.mp->children, npart); } if (hdr_pos > 0 && hdr_pos < str.len) { @@ -870,21 +926,20 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, npart->raw_data.len = (end - start) - body_pos; if (npart->raw_headers_len > 0) { - rspamd_mime_headers_process (task, npart->raw_headers, - &npart->headers_order, - npart->raw_headers_str, - npart->raw_headers_len, - FALSE); + rspamd_mime_headers_process(task, npart->raw_headers, + &npart->headers_order, + npart->raw_headers_str, + npart->raw_headers_len, + FALSE); /* Preserve the natural order */ if (npart->headers_order) { - LL_REVERSE2 (npart->headers_order, ord_next); + LL_REVERSE2(npart->headers_order, ord_next); } } hdr = rspamd_message_get_header_from_hash(npart->raw_headers, - "Content-Type", FALSE); - + "Content-Type", FALSE); } else { npart->raw_headers_str = 0; @@ -896,9 +951,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, if (hdr != NULL) { - DL_FOREACH (hdr, cur) { - ct = rspamd_content_type_parse (cur->value, strlen (cur->value), - task->task_pool); + DL_FOREACH(hdr, cur) + { + ct = rspamd_content_type_parse(cur->value, strlen(cur->value), + task->task_pool); /* Here we prefer multipart content-type or any content-type */ if (ct) { @@ -913,46 +969,45 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task, } if (sel == NULL) { - sel = rspamd_mempool_alloc0 (task->task_pool, sizeof (*sel)); - RSPAMD_FTOK_ASSIGN (&sel->type, "text"); - RSPAMD_FTOK_ASSIGN (&sel->subtype, "plain"); + sel = rspamd_mempool_alloc0(task->task_pool, sizeof(*sel)); + RSPAMD_FTOK_ASSIGN(&sel->type, "text"); + RSPAMD_FTOK_ASSIGN(&sel->subtype, "plain"); } npart->ct = sel; if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) { - st->nesting ++; - g_ptr_array_add (st->stack, npart); + st->nesting++; + g_ptr_array_add(st->stack, npart); npart->part_type = RSPAMD_MIME_PART_MULTIPART; - npart->specific.mp = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_multipart)); - memcpy (&npart->specific.mp->boundary, &sel->orig_boundary, - sizeof (rspamd_ftok_t)); - ret = rspamd_mime_parse_multipart_part (task, npart, st, err); + npart->specific.mp = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_multipart)); + memcpy(&npart->specific.mp->boundary, &sel->orig_boundary, + sizeof(rspamd_ftok_t)); + ret = rspamd_mime_parse_multipart_part(task, npart, st, err); } else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) { - st->nesting ++; - g_ptr_array_add (st->stack, npart); + st->nesting++; + g_ptr_array_add(st->stack, npart); npart->part_type = RSPAMD_MIME_PART_MESSAGE; - if ((ret = rspamd_mime_parse_normal_part (task, npart, st, sel, err)) - == RSPAMD_MIME_PARSE_OK) { - ret = rspamd_mime_parse_message (task, npart, st, err); + if ((ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err)) == RSPAMD_MIME_PARSE_OK) { + ret = rspamd_mime_parse_message(task, npart, st, err); } } else { - ret = rspamd_mime_parse_normal_part (task, npart, st, sel, err); + ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err); } return ret; } static enum rspamd_mime_parse_error -rspamd_mime_parse_multipart_cb (struct rspamd_task *task, - struct rspamd_mime_part *multipart, - struct rspamd_mime_parser_ctx *st, - struct rspamd_mime_multipart_cbdata *cb, - struct rspamd_mime_boundary *b) +rspamd_mime_parse_multipart_cb(struct rspamd_task *task, + struct rspamd_mime_part *multipart, + struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_multipart_cbdata *cb, + struct rspamd_mime_boundary *b) { const gchar *pos = st->start + b->boundary; enum rspamd_mime_parse_error ret; @@ -971,9 +1026,8 @@ rspamd_mime_parse_multipart_cb (struct rspamd_task *task, */ if (cb->part_start < pos && cb->cur_boundary) { - if ((ret = rspamd_mime_process_multipart_node (task, cb->st, - cb->multipart, cb->part_start, pos, TRUE, cb->err)) - != RSPAMD_MIME_PARSE_OK) { + if ((ret = rspamd_mime_process_multipart_node(task, cb->st, + cb->multipart, cb->part_start, pos, TRUE, cb->err)) != RSPAMD_MIME_PARSE_OK) { return ret; } @@ -992,10 +1046,10 @@ rspamd_mime_parse_multipart_cb (struct rspamd_task *task, } static enum rspamd_mime_parse_error -rspamd_multipart_boundaries_filter (struct rspamd_task *task, - struct rspamd_mime_part *multipart, - struct rspamd_mime_parser_ctx *st, - struct rspamd_mime_multipart_cbdata *cb) +rspamd_multipart_boundaries_filter(struct rspamd_task *task, + struct rspamd_mime_part *multipart, + struct rspamd_mime_parser_ctx *st, + struct rspamd_mime_multipart_cbdata *cb) { struct rspamd_mime_boundary *cur; goffset last_offset; @@ -1003,17 +1057,17 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, enum rspamd_mime_parse_error ret; last_offset = (multipart->raw_data.begin - st->start) + - multipart->raw_data.len; + multipart->raw_data.len; /* Find the first offset suitable for this part */ - for (i = 0; i < st->boundaries->len; i ++) { - cur = &g_array_index (st->boundaries, struct rspamd_mime_boundary, i); + for (i = 0; i < st->boundaries->len; i++) { + cur = &g_array_index(st->boundaries, struct rspamd_mime_boundary, i); if (cur->start >= multipart->raw_data.begin - st->start) { if (cb->cur_boundary) { /* Check boundary */ - msg_debug_mime ("compare %L and %L (and %L)", - cb->bhash, cur->hash, cur->closed_hash); + msg_debug_mime("compare %L and %L (and %L)", + cb->bhash, cur->hash, cur->closed_hash); if (cb->bhash == cur->hash) { sel = i; @@ -1029,8 +1083,8 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, } else { /* Set current boundary */ - cb->cur_boundary = rspamd_mempool_alloc (task->task_pool, - sizeof (rspamd_ftok_t)); + cb->cur_boundary = rspamd_mempool_alloc(task->task_pool, + sizeof(rspamd_ftok_t)); cb->cur_boundary->begin = st->start + cur->boundary; cb->cur_boundary->len = 0; cb->bhash = cur->hash; @@ -1041,16 +1095,16 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, } /* Now we can go forward with boundaries that are same to what we have */ - for (i = sel; i < st->boundaries->len; i ++) { - cur = &g_array_index (st->boundaries, struct rspamd_mime_boundary, i); + for (i = sel; i < st->boundaries->len; i++) { + cur = &g_array_index(st->boundaries, struct rspamd_mime_boundary, i); if (cur->boundary > last_offset) { break; } if (cur->hash == cb->bhash || cur->closed_hash == cb->bhash) { - if ((ret = rspamd_mime_parse_multipart_cb (task, multipart, st, - cb, cur)) != RSPAMD_MIME_PARSE_OK) { + if ((ret = rspamd_mime_parse_multipart_cb(task, multipart, st, + cb, cur)) != RSPAMD_MIME_PARSE_OK) { return ret; } @@ -1060,11 +1114,11 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, cur->hash = cur->closed_hash; } - if (RSPAMD_BOUNDARY_IS_CLOSED (cur)) { + if (RSPAMD_BOUNDARY_IS_CLOSED(cur)) { /* We also might check the next boundary... */ if (i < st->boundaries->len - 1) { - cur = &g_array_index (st->boundaries, - struct rspamd_mime_boundary, i + 1); + cur = &g_array_index(st->boundaries, + struct rspamd_mime_boundary, i + 1); if (cur->hash == cb->bhash) { continue; @@ -1089,8 +1143,8 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, fb.boundary = last_offset; fb.start = -1; - if ((ret = rspamd_mime_parse_multipart_cb (task, multipart, st, - cb, &fb)) != RSPAMD_MIME_PARSE_OK) { + if ((ret = rspamd_mime_parse_multipart_cb(task, multipart, st, + cb, &fb)) != RSPAMD_MIME_PARSE_OK) { return ret; } } @@ -1099,25 +1153,25 @@ rspamd_multipart_boundaries_filter (struct rspamd_task *task, } static enum rspamd_mime_parse_error -rspamd_mime_parse_multipart_part (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - GError **err) +rspamd_mime_parse_multipart_part(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + GError **err) { struct rspamd_mime_multipart_cbdata cbdata; enum rspamd_mime_parse_error ret; if (st->nesting > max_nested) { - g_set_error (err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", - st->nesting); + g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", + st->nesting); return RSPAMD_MIME_PARSE_NESTING; } - part->part_number = MESSAGE_FIELD (task, parts)->len; - part->urls = g_ptr_array_new (); - g_ptr_array_add (MESSAGE_FIELD (task, parts), part); - st->nesting ++; - rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE); + part->part_number = MESSAGE_FIELD(task, parts)->len; + part->urls = g_ptr_array_new(); + g_ptr_array_add(MESSAGE_FIELD(task, parts), part); + st->nesting++; + rspamd_mime_part_get_cte(task, part->raw_headers, part, FALSE); st->pos = part->raw_data.begin; cbdata.multipart = part; @@ -1129,10 +1183,10 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task, if (part->ct->boundary.len > 0) { /* We know our boundary */ cbdata.cur_boundary = &part->ct->boundary; - rspamd_cryptobox_siphash ((guchar *)&cbdata.bhash, - cbdata.cur_boundary->begin, cbdata.cur_boundary->len, - lib_ctx->hkey); - msg_debug_mime ("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash); + rspamd_cryptobox_siphash((guchar *) &cbdata.bhash, + cbdata.cur_boundary->begin, cbdata.cur_boundary->len, + lib_ctx->hkey); + msg_debug_mime("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash); } else { /* Guess boundary */ @@ -1140,23 +1194,23 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task, cbdata.bhash = 0; } - ret = rspamd_multipart_boundaries_filter (task, part, st, &cbdata); + ret = rspamd_multipart_boundaries_filter(task, part, st, &cbdata); /* Cleanup stack */ - st->nesting --; - g_ptr_array_remove_index_fast (st->stack, st->stack->len - 1); + st->nesting--; + g_ptr_array_remove_index_fast(st->stack, st->stack->len - 1); return ret; } /* Process boundary like structures in a message */ static gint -rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, - guint strnum, - gint match_start, - gint match_pos, - const gchar *text, - gsize len, - void *context) +rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp, + guint strnum, + gint match_start, + gint match_pos, + const gchar *text, + gsize len, + void *context) { const gchar *end = text + len, *p = text + match_pos, *bend; gsize blen; @@ -1167,7 +1221,7 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, task = st->task; - if (G_LIKELY (p < end)) { + if (G_LIKELY(p < end)) { blen = 0; @@ -1176,8 +1230,8 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, break; } - blen ++; - p ++; + blen++; + p++; } if (blen > 0) { @@ -1189,34 +1243,34 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, /* We need to verify last -- */ if (bend > p + 1 && *(bend - 1) == '-') { closing = TRUE; - bend --; + bend--; blen -= 2; } else { /* Not a closing boundary somehow, e.g. if a boundary=='-' */ - bend ++; + bend++; } } else { - bend ++; + bend++; } while (bend < end) { if (*bend == '\r') { - bend ++; + bend++; /* \r\n */ if (bend < end && *bend == '\n') { - bend ++; + bend++; } } else if (*bend == '\n') { /* \n */ - bend ++; + bend++; } - else if (g_ascii_isspace (*bend)){ + else if (g_ascii_isspace(*bend)) { /* Spaces in the same line, skip them */ - bend ++; + bend++; continue; } @@ -1233,32 +1287,32 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, lc_copy = lc_copy_buf; } else { - lc_copy = g_malloc (blen + 2); + lc_copy = g_malloc(blen + 2); } if (closing) { - memcpy (lc_copy, p, blen + 2); - rspamd_str_lc (lc_copy, blen + 2); + memcpy(lc_copy, p, blen + 2); + rspamd_str_lc(lc_copy, blen + 2); } else { - memcpy (lc_copy, p, blen); - rspamd_str_lc (lc_copy, blen); + memcpy(lc_copy, p, blen); + rspamd_str_lc(lc_copy, blen); } - rspamd_cryptobox_siphash ((guchar *)&b.hash, lc_copy, blen, - lib_ctx->hkey); - msg_debug_mime ("normal hash: %*s -> %L, %d boffset, %d data offset", - (gint)blen, lc_copy, b.hash, (int)b.boundary, (int)b.start); + rspamd_cryptobox_siphash((guchar *) &b.hash, lc_copy, blen, + lib_ctx->hkey); + msg_debug_mime("normal hash: %*s -> %L, %d boffset, %d data offset", + (gint) blen, lc_copy, b.hash, (int) b.boundary, (int) b.start); if (closing) { b.flags = RSPAMD_MIME_BOUNDARY_FLAG_CLOSED; - rspamd_cryptobox_siphash ((guchar *)&b.closed_hash, lc_copy, - blen + 2, - lib_ctx->hkey); - msg_debug_mime ("closing hash: %*s -> %L, %d boffset, %d data offset", - (gint)blen + 2, lc_copy, - b.closed_hash, - (int)b.boundary, (int)b.start); + rspamd_cryptobox_siphash((guchar *) &b.closed_hash, lc_copy, + blen + 2, + lib_ctx->hkey); + msg_debug_mime("closing hash: %*s -> %L, %d boffset, %d data offset", + (gint) blen + 2, lc_copy, + b.closed_hash, + (int) b.boundary, (int) b.start); } else { b.flags = 0; @@ -1269,7 +1323,7 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, if (blen + 2 >= sizeof(lc_copy_buf)) { g_free(lc_copy); } - g_array_append_val (st->boundaries, b); + g_array_append_val(st->boundaries, b); } } @@ -1277,10 +1331,10 @@ rspamd_mime_preprocess_cb (struct rspamd_multipattern *mp, } static goffset -rspamd_mime_parser_headers_heuristic (GString *input, goffset *body_start) +rspamd_mime_parser_headers_heuristic(GString *input, goffset *body_start) { const gsize default_max_len = 76; - gsize max_len = MIN (input->len, default_max_len); + gsize max_len = MIN(input->len, default_max_len); const gchar *p, *end; enum { st_before_colon = 0, @@ -1296,29 +1350,29 @@ rspamd_mime_parser_headers_heuristic (GString *input, goffset *body_start) while (p < end) { switch (state) { case st_before_colon: - if (G_UNLIKELY (*p == ':')) { + if (G_UNLIKELY(*p == ':')) { state = st_colon; } - else if (G_UNLIKELY (!g_ascii_isgraph (*p))) { + else if (G_UNLIKELY(!g_ascii_isgraph(*p))) { state = st_error; } - p ++; + p++; break; case st_colon: - if (g_ascii_isspace (*p)) { + if (g_ascii_isspace(*p)) { state = st_spaces_after_colon; } else { state = st_value; } - p ++; + p++; break; case st_spaces_after_colon: - if (!g_ascii_isspace (*p)) { + if (!g_ascii_isspace(*p)) { state = st_value; } - p ++; + p++; break; case st_value: /* We accept any value */ @@ -1343,40 +1397,40 @@ end: } static void -rspamd_mime_preprocess_message (struct rspamd_task *task, - struct rspamd_mime_part *top, - struct rspamd_mime_parser_ctx *st) +rspamd_mime_preprocess_message(struct rspamd_task *task, + struct rspamd_mime_part *top, + struct rspamd_mime_parser_ctx *st) { if (top->raw_data.begin >= st->pos) { - rspamd_multipattern_lookup (lib_ctx->mp_boundary, - top->raw_data.begin - 1, - top->raw_data.len + 1, - rspamd_mime_preprocess_cb, st, NULL); + rspamd_multipattern_lookup(lib_ctx->mp_boundary, + top->raw_data.begin - 1, + top->raw_data.len + 1, + rspamd_mime_preprocess_cb, st, NULL); } else { - rspamd_multipattern_lookup (lib_ctx->mp_boundary, - st->pos, - st->end - st->pos, - rspamd_mime_preprocess_cb, st, NULL); + rspamd_multipattern_lookup(lib_ctx->mp_boundary, + st->pos, + st->end - st->pos, + rspamd_mime_preprocess_cb, st, NULL); } } static void -rspamd_mime_parse_stack_free (struct rspamd_mime_parser_ctx *st) +rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st) { if (st) { - g_ptr_array_free (st->stack, TRUE); - g_array_free (st->boundaries, TRUE); - g_free (st); + g_ptr_array_free(st->stack, TRUE); + g_array_free(st->boundaries, TRUE); + g_free(st); } } static enum rspamd_mime_parse_error -rspamd_mime_parse_message (struct rspamd_task *task, - struct rspamd_mime_part *part, - struct rspamd_mime_parser_ctx *st, - GError **err) +rspamd_mime_parse_message(struct rspamd_task *task, + struct rspamd_mime_part *part, + struct rspamd_mime_parser_ctx *st, + GError **err) { struct rspamd_content_type *ct, *sel = NULL; struct rspamd_mime_header *hdr = NULL, *cur; @@ -1390,80 +1444,80 @@ rspamd_mime_parse_message (struct rspamd_task *task, struct rspamd_mime_parser_ctx *nst = st; if (st->nesting > max_nested) { - g_set_error (err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", - st->nesting); + g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d", + st->nesting); return RSPAMD_MIME_PARSE_NESTING; } /* Allocate real part */ - npart = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_part)); + npart = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_part)); if (part == NULL) { /* Top level message */ p = task->msg.begin; len = task->msg.len; - str.str = (gchar *)p; + str.str = (gchar *) p; str.len = len; - hdr_pos = rspamd_string_find_eoh (&str, &body_pos); + hdr_pos = rspamd_string_find_eoh(&str, &body_pos); if (hdr_pos > 0 && hdr_pos < str.len) { - MESSAGE_FIELD (task, raw_headers_content).begin = str.str; - MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos; - MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + body_pos; + MESSAGE_FIELD(task, raw_headers_content).begin = str.str; + MESSAGE_FIELD(task, raw_headers_content).len = hdr_pos; + MESSAGE_FIELD(task, raw_headers_content).body_start = str.str + body_pos; - if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { - rspamd_mime_headers_process (task, - MESSAGE_FIELD (task, raw_headers), - &MESSAGE_FIELD (task, headers_order), - MESSAGE_FIELD (task, raw_headers_content).begin, - MESSAGE_FIELD (task, raw_headers_content).len, - TRUE); - npart->raw_headers = rspamd_message_headers_ref ( - MESSAGE_FIELD (task, raw_headers)); + if (MESSAGE_FIELD(task, raw_headers_content).len > 0) { + rspamd_mime_headers_process(task, + MESSAGE_FIELD(task, raw_headers), + &MESSAGE_FIELD(task, headers_order), + MESSAGE_FIELD(task, raw_headers_content).begin, + MESSAGE_FIELD(task, raw_headers_content).len, + TRUE); + npart->raw_headers = rspamd_message_headers_ref( + MESSAGE_FIELD(task, raw_headers)); /* Preserve the natural order */ - if (MESSAGE_FIELD (task, headers_order)) { - LL_REVERSE2 (MESSAGE_FIELD (task, headers_order), ord_next); + if (MESSAGE_FIELD(task, headers_order)) { + LL_REVERSE2(MESSAGE_FIELD(task, headers_order), ord_next); } } hdr = rspamd_message_get_header_from_hash( - MESSAGE_FIELD (task, raw_headers), - "Content-Type", FALSE); + MESSAGE_FIELD(task, raw_headers), + "Content-Type", FALSE); } else { /* First apply heuristic, maybe we have just headers */ - hdr_pos = rspamd_mime_parser_headers_heuristic (&str, &body_pos); + hdr_pos = rspamd_mime_parser_headers_heuristic(&str, &body_pos); if (hdr_pos > 0 && hdr_pos <= str.len) { - MESSAGE_FIELD (task, raw_headers_content).begin = str.str; - MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos; - MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + - body_pos; - - if (MESSAGE_FIELD (task, raw_headers_content).len > 0) { - rspamd_mime_headers_process (task, - MESSAGE_FIELD (task, raw_headers), - &MESSAGE_FIELD (task, headers_order), - MESSAGE_FIELD (task, raw_headers_content).begin, - MESSAGE_FIELD (task, raw_headers_content).len, - TRUE); - npart->raw_headers = rspamd_message_headers_ref ( - MESSAGE_FIELD (task, raw_headers)); + MESSAGE_FIELD(task, raw_headers_content).begin = str.str; + MESSAGE_FIELD(task, raw_headers_content).len = hdr_pos; + MESSAGE_FIELD(task, raw_headers_content).body_start = str.str + + body_pos; + + if (MESSAGE_FIELD(task, raw_headers_content).len > 0) { + rspamd_mime_headers_process(task, + MESSAGE_FIELD(task, raw_headers), + &MESSAGE_FIELD(task, headers_order), + MESSAGE_FIELD(task, raw_headers_content).begin, + MESSAGE_FIELD(task, raw_headers_content).len, + TRUE); + npart->raw_headers = rspamd_message_headers_ref( + MESSAGE_FIELD(task, raw_headers)); /* Preserve the natural order */ - if (MESSAGE_FIELD (task, headers_order)) { - LL_REVERSE2 (MESSAGE_FIELD (task, headers_order), ord_next); + if (MESSAGE_FIELD(task, headers_order)) { + LL_REVERSE2(MESSAGE_FIELD(task, headers_order), ord_next); } } hdr = rspamd_message_get_header_from_hash( - MESSAGE_FIELD (task, raw_headers), - "Content-Type", FALSE); + MESSAGE_FIELD(task, raw_headers), + "Content-Type", FALSE); task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS; } else { @@ -1480,22 +1534,22 @@ rspamd_mime_parse_message (struct rspamd_task *task, * Here are dragons: * We allocate new parser context as we need to shift pointers */ - nst = g_malloc0 (sizeof (*st)); - nst->stack = g_ptr_array_sized_new (4); - nst->boundaries = g_array_sized_new (FALSE, FALSE, - sizeof (struct rspamd_mime_boundary), 8); + nst = g_malloc0(sizeof(*st)); + nst->stack = g_ptr_array_sized_new(4); + nst->boundaries = g_array_sized_new(FALSE, FALSE, + sizeof(struct rspamd_mime_boundary), 8); nst->start = part->parsed_data.begin; nst->end = nst->start + part->parsed_data.len; nst->pos = nst->start; nst->task = st->task; nst->nesting = st->nesting; - st->nesting ++; + st->nesting++; - str.str = (gchar *)part->parsed_data.begin; + str.str = (gchar *) part->parsed_data.begin; str.len = part->parsed_data.len; - hdr_pos = rspamd_string_find_eoh (&str, &body_pos); - npart->raw_headers = rspamd_message_headers_new (); + hdr_pos = rspamd_string_find_eoh(&str, &body_pos); + npart->raw_headers = rspamd_message_headers_new(); npart->headers_order = NULL; if (hdr_pos > 0 && hdr_pos < str.len) { @@ -1504,21 +1558,21 @@ rspamd_mime_parse_message (struct rspamd_task *task, npart->raw_data.begin = str.str + body_pos; if (npart->raw_headers_len > 0) { - rspamd_mime_headers_process (task, - npart->raw_headers, - &npart->headers_order, - npart->raw_headers_str, - npart->raw_headers_len, - FALSE); + rspamd_mime_headers_process(task, + npart->raw_headers, + &npart->headers_order, + npart->raw_headers_str, + npart->raw_headers_len, + FALSE); /* Preserve the natural order */ if (npart->headers_order) { - LL_REVERSE2 (npart->headers_order, ord_next); + LL_REVERSE2(npart->headers_order, ord_next); } } hdr = rspamd_message_get_header_from_hash(npart->raw_headers, - "Content-Type", FALSE); + "Content-Type", FALSE); } else { body_pos = 0; @@ -1536,9 +1590,10 @@ rspamd_mime_parse_message (struct rspamd_task *task, sel = NULL; } else { - DL_FOREACH (hdr, cur) { - ct = rspamd_content_type_parse (cur->value, strlen (cur->value), - task->task_pool); + DL_FOREACH(hdr, cur) + { + ct = rspamd_content_type_parse(cur->value, strlen(cur->value), + task->task_pool); /* Here we prefer multipart content-type or any content-type */ if (ct) { @@ -1554,40 +1609,39 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (sel == NULL) { /* For messages we automatically assume plaintext */ - msg_info_task ("cannot find content-type for a message, assume text/plain"); - sel = rspamd_mempool_alloc0 (task->task_pool, sizeof (*sel)); - sel->flags = RSPAMD_CONTENT_TYPE_TEXT|RSPAMD_CONTENT_TYPE_MISSING; - RSPAMD_FTOK_ASSIGN (&sel->type, "text"); - RSPAMD_FTOK_ASSIGN (&sel->subtype, "plain"); + msg_info_task("cannot find content-type for a message, assume text/plain"); + sel = rspamd_mempool_alloc0(task->task_pool, sizeof(*sel)); + sel->flags = RSPAMD_CONTENT_TYPE_TEXT | RSPAMD_CONTENT_TYPE_MISSING; + RSPAMD_FTOK_ASSIGN(&sel->type, "text"); + RSPAMD_FTOK_ASSIGN(&sel->subtype, "plain"); } npart->ct = sel; if ((part == NULL || nst != st) && - (sel->flags & (RSPAMD_CONTENT_TYPE_MULTIPART|RSPAMD_CONTENT_TYPE_MESSAGE))) { + (sel->flags & (RSPAMD_CONTENT_TYPE_MULTIPART | RSPAMD_CONTENT_TYPE_MESSAGE))) { /* Not a trivial message, need to preprocess */ - rspamd_mime_preprocess_message (task, npart, nst); + rspamd_mime_preprocess_message(task, npart, nst); } if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) { - g_ptr_array_add (nst->stack, npart); - nst->nesting ++; + g_ptr_array_add(nst->stack, npart); + nst->nesting++; npart->part_type = RSPAMD_MIME_PART_MULTIPART; - npart->specific.mp = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_mime_multipart)); - memcpy (&npart->specific.mp->boundary, &sel->orig_boundary, - sizeof (rspamd_ftok_t)); - ret = rspamd_mime_parse_multipart_part (task, npart, nst, err); + npart->specific.mp = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_mime_multipart)); + memcpy(&npart->specific.mp->boundary, &sel->orig_boundary, + sizeof(rspamd_ftok_t)); + ret = rspamd_mime_parse_multipart_part(task, npart, nst, err); } else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) { - if ((ret = rspamd_mime_parse_normal_part (task, npart, nst, sel, err)) - == RSPAMD_MIME_PARSE_OK) { + if ((ret = rspamd_mime_parse_normal_part(task, npart, nst, sel, err)) == RSPAMD_MIME_PARSE_OK) { npart->part_type = RSPAMD_MIME_PART_MESSAGE; - ret = rspamd_mime_parse_message (task, npart, nst, err); + ret = rspamd_mime_parse_message(task, npart, nst, err); } } else { - ret = rspamd_mime_parse_normal_part (task, npart, nst, sel, err); + ret = rspamd_mime_parse_normal_part(task, npart, nst, sel, err); } if (ret != RSPAMD_MIME_PARSE_OK) { @@ -1596,28 +1650,28 @@ rspamd_mime_parse_message (struct rspamd_task *task, if (part && st->stack->len > 0) { /* Remove message part from the parent stack */ - g_ptr_array_remove_index_fast (st->stack, st->stack->len - 1); - st->nesting --; + g_ptr_array_remove_index_fast(st->stack, st->stack->len - 1); + st->nesting--; } /* Process leftovers for boundaries */ if (nst->boundaries) { struct rspamd_mime_boundary *boundary, *start_boundary = NULL, - *end_boundary = NULL; + *end_boundary = NULL; goffset cur_offset = nst->pos - nst->start, - end_offset = st->end - st->start; + end_offset = st->end - st->start; guint sel_idx = 0; for (;;) { start_boundary = NULL; for (i = sel_idx; i < nst->boundaries->len; i++) { - boundary = &g_array_index (nst->boundaries, - struct rspamd_mime_boundary, i); + boundary = &g_array_index(nst->boundaries, + struct rspamd_mime_boundary, i); if (boundary->start > cur_offset && boundary->boundary < end_offset && - !RSPAMD_BOUNDARY_IS_CLOSED (boundary)) { + !RSPAMD_BOUNDARY_IS_CLOSED(boundary)) { start_boundary = boundary; sel_idx = i; break; @@ -1628,24 +1682,24 @@ rspamd_mime_parse_message (struct rspamd_task *task, const gchar *start, *end; if (nst->boundaries->len > sel_idx + 1) { - end_boundary = &g_array_index (nst->boundaries, - struct rspamd_mime_boundary, sel_idx + 1); + end_boundary = &g_array_index(nst->boundaries, + struct rspamd_mime_boundary, sel_idx + 1); end = nst->start + end_boundary->boundary; } else { end = nst->end; } - sel_idx ++; + sel_idx++; start = nst->start + start_boundary->start; if (end > start && - (ret = rspamd_mime_process_multipart_node (task, nst, - NULL, start, end, FALSE, err)) != RSPAMD_MIME_PARSE_OK) { + (ret = rspamd_mime_process_multipart_node(task, nst, + NULL, start, end, FALSE, err)) != RSPAMD_MIME_PARSE_OK) { if (nst != st) { - rspamd_mime_parse_stack_free (nst); + rspamd_mime_parse_stack_free(nst); } if (ret == RSPAMD_MIME_PARSE_NO_PART) { @@ -1662,34 +1716,34 @@ rspamd_mime_parse_message (struct rspamd_task *task, } if (nst != st) { - rspamd_mime_parse_stack_free (nst); + rspamd_mime_parse_stack_free(nst); } return ret; } enum rspamd_mime_parse_error -rspamd_mime_parse_task (struct rspamd_task *task, GError **err) +rspamd_mime_parse_task(struct rspamd_task *task, GError **err) { struct rspamd_mime_parser_ctx *st; enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK; if (lib_ctx == NULL) { - rspamd_mime_parser_init_lib (); + rspamd_mime_parser_init_lib(); } if (++lib_ctx->key_usages > max_key_usages) { /* Regenerate siphash key */ - ottery_rand_bytes (lib_ctx->hkey, sizeof (lib_ctx->hkey)); + ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey)); lib_ctx->key_usages = 0; } - st = g_malloc0 (sizeof (*st)); - st->stack = g_ptr_array_sized_new (4); - st->pos = MESSAGE_FIELD (task, raw_headers_content).body_start; + st = g_malloc0(sizeof(*st)); + st->stack = g_ptr_array_sized_new(4); + st->pos = MESSAGE_FIELD(task, raw_headers_content).body_start; st->end = task->msg.begin + task->msg.len; - st->boundaries = g_array_sized_new (FALSE, FALSE, - sizeof (struct rspamd_mime_boundary), 8); + st->boundaries = g_array_sized_new(FALSE, FALSE, + sizeof(struct rspamd_mime_boundary), 8); st->task = task; if (st->pos == NULL) { @@ -1697,8 +1751,8 @@ rspamd_mime_parse_task (struct rspamd_task *task, GError **err) } st->start = task->msg.begin; - ret = rspamd_mime_parse_message (task, NULL, st, err); - rspamd_mime_parse_stack_free (st); + ret = rspamd_mime_parse_message(task, NULL, st, err); + rspamd_mime_parse_stack_free(st); return ret; } diff --git a/src/libmime/mime_parser.h b/src/libmime/mime_parser.h index ed3fe5f87..aa77b2b30 100644 --- a/src/libmime/mime_parser.h +++ b/src/libmime/mime_parser.h @@ -19,7 +19,7 @@ #include "config.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -33,13 +33,13 @@ enum rspamd_mime_parse_error { RSPAMD_MIME_PARSE_NO_PART, }; -enum rspamd_mime_parse_error rspamd_mime_parse_task (struct rspamd_task *task, - GError **err); +enum rspamd_mime_parse_error rspamd_mime_parse_task(struct rspamd_task *task, + GError **err); -void rspamd_mime_parser_calc_digest (struct rspamd_mime_part *part); +void rspamd_mime_parser_calc_digest(struct rspamd_mime_part *part); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/mime_string.cxx b/src/libmime/mime_string.cxx index a93ce199c..e818e6412 100644 --- a/src/libmime/mime_string.cxx +++ b/src/libmime/mime_string.cxx @@ -19,133 +19,149 @@ #include "mime_string.hxx" #include "unicode/uchar.h" -TEST_SUITE("mime_string") { -using namespace rspamd::mime; -TEST_CASE("mime_string unfiltered ctors") +TEST_SUITE("mime_string") { - SUBCASE("empty") { - mime_string st; - CHECK(st.size() == 0); - CHECK(st == ""); - } - SUBCASE("unfiltered valid") { - mime_string st{std::string_view("abcd")}; - CHECK(st == "abcd"); - } - SUBCASE("unfiltered zero character") { - mime_string st{"abc\0d", 5}; - CHECK(st.has_zeroes()); - CHECK(st == "abcd"); - } - SUBCASE("unfiltered invalid character - middle") { - mime_string st{std::string("abc\234d")}; - CHECK(st.has_invalid()); - CHECK(st == "abc\uFFFDd"); - } - SUBCASE("unfiltered invalid character - end") { - mime_string st{std::string("abc\234")}; - CHECK(st.has_invalid()); - CHECK(st == "abc\uFFFD"); - } - SUBCASE("unfiltered invalid character - start") { - mime_string st{std::string("\234abc")}; - CHECK(st.has_invalid()); - CHECK(st == "\uFFFDabc"); - } -} - -TEST_CASE("mime_string filtered ctors") -{ - auto print_filter = [](UChar32 inp) -> UChar32 { - if (!u_isprint(inp)) { - return 0; + using namespace rspamd::mime; + TEST_CASE("mime_string unfiltered ctors") + { + SUBCASE("empty") + { + mime_string st; + CHECK(st.size() == 0); + CHECK(st == ""); + } + SUBCASE("unfiltered valid") + { + mime_string st{std::string_view("abcd")}; + CHECK(st == "abcd"); + } + SUBCASE("unfiltered zero character") + { + mime_string st{"abc\0d", 5}; + CHECK(st.has_zeroes()); + CHECK(st == "abcd"); + } + SUBCASE("unfiltered invalid character - middle") + { + mime_string st{std::string("abc\234d")}; + CHECK(st.has_invalid()); + CHECK(st == "abc\uFFFDd"); + } + SUBCASE("unfiltered invalid character - end") + { + mime_string st{std::string("abc\234")}; + CHECK(st.has_invalid()); + CHECK(st == "abc\uFFFD"); + } + SUBCASE("unfiltered invalid character - start") + { + mime_string st{std::string("\234abc")}; + CHECK(st.has_invalid()); + CHECK(st == "\uFFFDabc"); } - - return inp; - }; - - auto tolower_filter = [](UChar32 inp) -> UChar32 { - return u_tolower(inp); - }; - - SUBCASE("empty") { - mime_string st{std::string_view(""), tolower_filter}; - CHECK(st.size() == 0); - CHECK(st == ""); - } - SUBCASE("filtered valid") { - mime_string st{std::string("AbCdУ"), tolower_filter}; - CHECK(st == "abcdу"); - } - SUBCASE("filtered invalid + filtered") { - mime_string st{std::string("abcd\234\1"), print_filter}; - CHECK(st == "abcd\uFFFD"); } -} -TEST_CASE("mime_string assign") -{ - SUBCASE("assign from valid") { - mime_string st; - CHECK(st.assign_if_valid(std::string("test"))); - CHECK(st == "test"); + TEST_CASE("mime_string filtered ctors") + { + auto print_filter = [](UChar32 inp) -> UChar32 { + if (!u_isprint(inp)) { + return 0; + } + + return inp; + }; + + auto tolower_filter = [](UChar32 inp) -> UChar32 { + return u_tolower(inp); + }; + + SUBCASE("empty") + { + mime_string st{std::string_view(""), tolower_filter}; + CHECK(st.size() == 0); + CHECK(st == ""); + } + SUBCASE("filtered valid") + { + mime_string st{std::string("AbCdУ"), tolower_filter}; + CHECK(st == "abcdу"); + } + SUBCASE("filtered invalid + filtered") + { + mime_string st{std::string("abcd\234\1"), print_filter}; + CHECK(st == "abcd\uFFFD"); + } } - SUBCASE("assign from invalid") { - mime_string st; + TEST_CASE("mime_string assign") + { + SUBCASE("assign from valid") + { + mime_string st; + + CHECK(st.assign_if_valid(std::string("test"))); + CHECK(st == "test"); + } + SUBCASE("assign from invalid") + { + mime_string st; - CHECK(!st.assign_if_valid(std::string("test\234t"))); - CHECK(st == ""); + CHECK(!st.assign_if_valid(std::string("test\234t"))); + CHECK(st == ""); + } } -} -TEST_CASE("mime_string iterators") -{ + TEST_CASE("mime_string iterators") + { - SUBCASE("unfiltered iterator ascii") { - auto in = std::string("abcd"); - mime_string st{in}; - CHECK(st == "abcd"); + SUBCASE("unfiltered iterator ascii") + { + auto in = std::string("abcd"); + mime_string st{in}; + CHECK(st == "abcd"); - int i = 0; - for (auto &&c : st) { - CHECK(c == in[i++]); + int i = 0; + for (auto &&c: st) { + CHECK(c == in[i++]); + } } - } - - SUBCASE("unfiltered iterator utf8") { - auto in = std::string("тест"); - UChar32 ucs[4] = {1090, 1077, 1089, 1090}; - mime_string st{in}; - CHECK(st == "тест"); - int i = 0; - for (auto &&c : st) { - CHECK(c == ucs[i++]); + SUBCASE("unfiltered iterator utf8") + { + auto in = std::string("тест"); + UChar32 ucs[4] = {1090, 1077, 1089, 1090}; + mime_string st{in}; + CHECK(st == "тест"); + + int i = 0; + for (auto &&c: st) { + CHECK(c == ucs[i++]); + } + CHECK(i == sizeof(ucs) / sizeof(ucs[0])); } - CHECK(i == sizeof(ucs) / sizeof(ucs[0])); - } - SUBCASE("unfiltered raw iterator ascii") { - auto in = std::string("abcd"); - mime_string st{in}; - CHECK(st == "abcd"); + SUBCASE("unfiltered raw iterator ascii") + { + auto in = std::string("abcd"); + mime_string st{in}; + CHECK(st == "abcd"); - int i = 0; - for (auto it = st.raw_begin(); it != st.raw_end(); ++it) { - CHECK(*it == in[i++]); + int i = 0; + for (auto it = st.raw_begin(); it != st.raw_end(); ++it) { + CHECK(*it == in[i++]); + } } - } - - SUBCASE("unfiltered raw iterator utf8") { - auto in = std::string("тест"); - mime_string st{in}; - CHECK(st == "тест"); - int i = 0; - for (auto it = st.raw_begin(); it != st.raw_end(); ++it) { - CHECK(*it == in[i++]); + SUBCASE("unfiltered raw iterator utf8") + { + auto in = std::string("тест"); + mime_string st{in}; + CHECK(st == "тест"); + + int i = 0; + for (auto it = st.raw_begin(); it != st.raw_end(); ++it) { + CHECK(*it == in[i++]); + } + CHECK(i == in.size()); } - CHECK(i == in.size()); } -} }
\ No newline at end of file diff --git a/src/libmime/mime_string.hxx b/src/libmime/mime_string.hxx index fbd03206a..583a2c903 100644 --- a/src/libmime/mime_string.hxx +++ b/src/libmime/mime_string.hxx @@ -39,8 +39,9 @@ namespace rspamd::mime { * Mime string iterators are always const, so the underlying storage should not * be modified externally. */ -template<class T=char, class Allocator = std::allocator<T>, - class Functor = fu2::function_view<UChar32(UChar32)>> class basic_mime_string; +template<class T = char, class Allocator = std::allocator<T>, + class Functor = fu2::function_view<UChar32(UChar32)>> +class basic_mime_string; using mime_string = basic_mime_string<char>; using mime_pool_string = basic_mime_string<char, mempool_allocator<char>>; @@ -52,27 +53,26 @@ enum class mime_string_flags : std::uint8_t { MIME_STRING_SEEN_INVALID = 0x1 << 1, }; -constexpr mime_string_flags operator |(mime_string_flags lhs, mime_string_flags rhs) +constexpr mime_string_flags operator|(mime_string_flags lhs, mime_string_flags rhs) { using ut = std::underlying_type<mime_string_flags>::type; return static_cast<mime_string_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs)); } -constexpr mime_string_flags operator &(mime_string_flags lhs, mime_string_flags rhs) +constexpr mime_string_flags operator&(mime_string_flags lhs, mime_string_flags rhs) { using ut = std::underlying_type<mime_string_flags>::type; return static_cast<mime_string_flags>(static_cast<ut>(lhs) & static_cast<ut>(rhs)); } -constexpr bool operator !(mime_string_flags fl) +constexpr bool operator!(mime_string_flags fl) { return fl == mime_string_flags::MIME_STRING_DEFAULT; } // Codepoint iterator base class template<typename Container, bool Raw = false> -struct iterator_base -{ +struct iterator_base { template<typename, typename, typename> friend class basic_mime_string; @@ -93,8 +93,10 @@ public: return idx != it.idx; } - iterator_base(difference_type index, Container *instance) noexcept: - idx(index), cont_instance(instance) {} + iterator_base(difference_type index, Container *instance) noexcept + : idx(index), cont_instance(instance) + { + } iterator_base() noexcept = default; iterator_base(const iterator_base &) noexcept = default; @@ -105,7 +107,8 @@ public: return cont_instance; } - codepoint_type get_value() const noexcept { + codepoint_type get_value() const noexcept + { auto i = idx; codepoint_type uc; U8_NEXT_UNSAFE(cont_instance->data(), i, uc); @@ -113,10 +116,12 @@ public: } protected: - difference_type idx; - Container* cont_instance = nullptr; + difference_type idx; + Container *cont_instance = nullptr; + protected: - void advance(difference_type n) noexcept { + void advance(difference_type n) noexcept + { if (n > 0) { U8_FWD_N_UNSAFE(cont_instance->data(), idx, n); } @@ -124,12 +129,14 @@ protected: U8_BACK_N_UNSAFE(cont_instance->data(), idx, (-n)); } } - void increment() noexcept { + void increment() noexcept + { codepoint_type uc; U8_NEXT_UNSAFE(cont_instance->data(), idx, uc); } - void decrement() noexcept { + void decrement() noexcept + { codepoint_type uc; U8_PREV_UNSAFE(cont_instance->data(), idx, uc); } @@ -137,8 +144,7 @@ protected: // Partial spec for raw Byte-based iterator base template<typename Container> -struct iterator_base<Container, true> -{ +struct iterator_base<Container, true> { template<typename, typename, typename> friend class basic_string; @@ -148,40 +154,63 @@ public: using reference_type = value_type; using iterator_category = std::bidirectional_iterator_tag; - bool operator==( const iterator_base& it ) const noexcept { return idx == it.idx; } - bool operator!=( const iterator_base& it ) const noexcept { return idx != it.idx; } + bool operator==(const iterator_base &it) const noexcept + { + return idx == it.idx; + } + bool operator!=(const iterator_base &it) const noexcept + { + return idx != it.idx; + } - iterator_base(difference_type index, Container *instance) noexcept: - idx(index), cont_instance(instance) {} + iterator_base(difference_type index, Container *instance) noexcept + : idx(index), cont_instance(instance) + { + } iterator_base() noexcept = default; - iterator_base( const iterator_base& ) noexcept = default; - iterator_base& operator=( const iterator_base& ) noexcept = default; - Container* get_instance() const noexcept { return cont_instance; } + iterator_base(const iterator_base &) noexcept = default; + iterator_base &operator=(const iterator_base &) noexcept = default; + Container *get_instance() const noexcept + { + return cont_instance; + } - value_type get_value() const noexcept { return cont_instance->get_storage().at(idx); } -protected: - difference_type idx; - Container* cont_instance = nullptr; + value_type get_value() const noexcept + { + return cont_instance->get_storage().at(idx); + } protected: + difference_type idx; + Container *cont_instance = nullptr; +protected: //! Advance the iterator n times (negative values allowed!) - void advance( difference_type n ) noexcept { + void advance(difference_type n) noexcept + { idx += n; } - void increment() noexcept { idx ++; } - void decrement() noexcept { idx --; } + void increment() noexcept + { + idx++; + } + void decrement() noexcept + { + idx--; + } }; -template<typename Container, bool Raw> struct iterator; -template<typename Container, bool Raw> struct const_iterator; +template<typename Container, bool Raw> +struct iterator; +template<typename Container, bool Raw> +struct const_iterator; template<typename Container, bool Raw = false> struct iterator : iterator_base<Container, Raw> { - iterator(typename iterator_base<Container, Raw>::difference_type index, Container *instance) noexcept: - iterator_base<Container, Raw>(index, instance) + iterator(typename iterator_base<Container, Raw>::difference_type index, Container *instance) noexcept + : iterator_base<Container, Raw>(index, instance) { } iterator() noexcept = default; @@ -265,26 +294,39 @@ public: using iterator = rspamd::mime::iterator<basic_mime_string, false>; using raw_iterator = rspamd::mime::iterator<basic_mime_string, true>; /* Ctors */ - basic_mime_string() noexcept : Allocator() {} - explicit basic_mime_string(const Allocator& alloc) noexcept : Allocator(alloc) {} - explicit basic_mime_string(filter_type &&filt, const Allocator& alloc = Allocator()) noexcept : - Allocator(alloc), filter_func(std::move(filt)) {} + basic_mime_string() noexcept + : Allocator() + { + } + explicit basic_mime_string(const Allocator &alloc) noexcept + : Allocator(alloc) + { + } + explicit basic_mime_string(filter_type &&filt, const Allocator &alloc = Allocator()) noexcept + : Allocator(alloc), filter_func(std::move(filt)) + { + } - basic_mime_string(const CharT* str, std::size_t sz, const Allocator& alloc = Allocator()) noexcept : - Allocator(alloc) + basic_mime_string(const CharT *str, std::size_t sz, const Allocator &alloc = Allocator()) noexcept + : Allocator(alloc) { append_c_string_unfiltered(str, sz); } basic_mime_string(const storage_type &st, - const Allocator& alloc = Allocator()) noexcept : - basic_mime_string(st.data(), st.size(), alloc) {} + const Allocator &alloc = Allocator()) noexcept + : basic_mime_string(st.data(), st.size(), alloc) + { + } basic_mime_string(const view_type &st, - const Allocator& alloc = Allocator()) noexcept : - basic_mime_string(st.data(), st.size(), alloc) {} + const Allocator &alloc = Allocator()) noexcept + : basic_mime_string(st.data(), st.size(), alloc) + { + } /* Explicit move ctor */ - basic_mime_string(basic_mime_string &&other) noexcept { + basic_mime_string(basic_mime_string &&other) noexcept + { *this = std::move(other); } @@ -297,45 +339,54 @@ public: * @param filt * @param alloc */ - basic_mime_string(const CharT* str, std::size_t sz, + basic_mime_string(const CharT *str, std::size_t sz, filter_type &&filt, - const Allocator& alloc = Allocator()) noexcept : - Allocator(alloc), - filter_func(std::move(filt)) + const Allocator &alloc = Allocator()) noexcept + : Allocator(alloc), + filter_func(std::move(filt)) { append_c_string_filtered(str, sz); } basic_mime_string(const storage_type &st, filter_type &&filt, - const Allocator& alloc = Allocator()) noexcept : - basic_mime_string(st.data(), st.size(), std::move(filt), alloc) {} + const Allocator &alloc = Allocator()) noexcept + : basic_mime_string(st.data(), st.size(), std::move(filt), alloc) + { + } basic_mime_string(const view_type &st, filter_type &&filt, - const Allocator& alloc = Allocator()) noexcept : - basic_mime_string(st.data(), st.size(), std::move(filt), alloc) {} + const Allocator &alloc = Allocator()) noexcept + : basic_mime_string(st.data(), st.size(), std::move(filt), alloc) + { + } /* It seems some libc++ implementations still perform copy, this might fix them */ - basic_mime_string& operator=(basic_mime_string &&other) { + basic_mime_string &operator=(basic_mime_string &&other) + { storage = std::move(other.storage); filter_func = std::move(other.filter_func); return *this; } - constexpr auto size() const noexcept -> std::size_t { + constexpr auto size() const noexcept -> std::size_t + { return storage.size(); } - constexpr auto data() const noexcept -> const CharT* { + constexpr auto data() const noexcept -> const CharT * + { return storage.data(); } - constexpr auto has_zeroes() const noexcept -> bool { + constexpr auto has_zeroes() const noexcept -> bool + { return !!(flags & mime_string_flags::MIME_STRING_SEEN_ZEROES); } - constexpr auto has_invalid() const noexcept -> bool { + constexpr auto has_invalid() const noexcept -> bool + { return !!(flags & mime_string_flags::MIME_STRING_SEEN_INVALID); } @@ -347,12 +398,13 @@ public: * @param other * @return */ - [[nodiscard]] auto assign_if_valid(storage_type &&other) -> bool { + [[nodiscard]] auto assign_if_valid(storage_type &&other) -> bool + { if (filter_func) { /* No way */ return false; } - if (rspamd_fast_utf8_validate((const unsigned char *)other.data(), other.size()) == 0) { + if (rspamd_fast_utf8_validate((const unsigned char *) other.data(), other.size()) == 0) { std::swap(storage, other); return true; @@ -366,7 +418,8 @@ public: * @param other * @return */ - auto assign_copy(const view_type &other) { + auto assign_copy(const view_type &other) + { storage.clear(); if (filter_func) { @@ -376,7 +429,8 @@ public: append_c_string_unfiltered(other.data(), other.size()); } } - auto assign_copy(const storage_type &other) { + auto assign_copy(const storage_type &other) + { storage.clear(); if (filter_func) { @@ -386,7 +440,8 @@ public: append_c_string_unfiltered(other.data(), other.size()); } } - auto assign_copy(const basic_mime_string &other) { + auto assign_copy(const basic_mime_string &other) + { storage.clear(); if (filter_func) { @@ -398,7 +453,8 @@ public: } /* Mutators */ - auto append(const CharT* str, std::size_t size) -> std::size_t { + auto append(const CharT *str, std::size_t size) -> std::size_t + { if (filter_func) { return append_c_string_filtered(str, size); } @@ -406,47 +462,54 @@ public: return append_c_string_unfiltered(str, size); } } - auto append(const storage_type &other) -> std::size_t { + auto append(const storage_type &other) -> std::size_t + { return append(other.data(), other.size()); } - auto append(const view_type &other) -> std::size_t { + auto append(const view_type &other) -> std::size_t + { return append(other.data(), other.size()); } auto ltrim(const view_type &what) -> void { auto it = std::find_if(storage.begin(), storage.end(), - [&what](CharT c) { - return !std::any_of(what.begin(), what.end(), [&c](CharT sc) { return sc == c; }); - }); + [&what](CharT c) { + return !std::any_of(what.begin(), what.end(), [&c](CharT sc) { return sc == c; }); + }); storage.erase(storage.begin(), it); } auto rtrim(const view_type &what) -> void { auto it = std::find_if(storage.rbegin(), storage.rend(), - [&what](CharT c) { - return !std::any_of(what.begin(), what.end(), [&c](CharT sc) { return sc == c; }); - }); + [&what](CharT c) { + return !std::any_of(what.begin(), what.end(), [&c](CharT sc) { return sc == c; }); + }); storage.erase(it.base(), storage.end()); } - auto trim(const view_type &what) -> void { + auto trim(const view_type &what) -> void + { ltrim(what); rtrim(what); } /* Comparison */ - auto operator ==(const basic_mime_string &other) { + auto operator==(const basic_mime_string &other) + { return other.storage == storage; } - auto operator ==(const storage_type &other) { + auto operator==(const storage_type &other) + { return other == storage; } - auto operator ==(const view_type &other) { + auto operator==(const view_type &other) + { return other == storage; } - auto operator ==(const CharT* other) { + auto operator==(const CharT *other) + { if (other == NULL) { return false; } @@ -485,36 +548,43 @@ public: return storage; } - inline auto as_view() const noexcept -> view_type { + inline auto as_view() const noexcept -> view_type + { return view_type{storage}; } - constexpr CharT operator[](std::size_t pos) const noexcept { + constexpr CharT operator[](std::size_t pos) const noexcept + { return storage[pos]; } - constexpr CharT at(std::size_t pos) const { + constexpr CharT at(std::size_t pos) const + { return storage.at(pos); } - constexpr bool empty() const noexcept { + constexpr bool empty() const noexcept + { return storage.empty(); } /* For doctest stringify */ - friend std::ostream& operator<< (std::ostream& os, const CharT& value) { + friend std::ostream &operator<<(std::ostream &os, const CharT &value) + { os << value.storage; return os; } + private: mime_string_flags flags = mime_string_flags::MIME_STRING_DEFAULT; storage_type storage; filter_type filter_func; - auto append_c_string_unfiltered(const CharT* str, std::size_t len) -> std::size_t { + auto append_c_string_unfiltered(const CharT *str, std::size_t len) -> std::size_t + { /* This is fast path */ const auto *p = str; const auto *end = str + len; - std::int32_t err_offset; // We have to use int32_t here as old libicu is brain-damaged + std::int32_t err_offset;// We have to use int32_t here as old libicu is brain-damaged auto orig_size = storage.size(); storage.reserve(len + storage.size()); @@ -526,7 +596,7 @@ private: } while (p < end && len > 0 && - (err_offset = rspamd_fast_utf8_validate((const unsigned char *)p, len)) > 0) { + (err_offset = rspamd_fast_utf8_validate((const unsigned char *) p, len)) > 0) { auto cur_offset = err_offset - 1; storage.append(p, cur_offset); @@ -554,8 +624,9 @@ private: return storage.size() - orig_size; } - auto append_c_string_filtered(const CharT* str, std::size_t len) -> std::size_t { - std::int32_t i = 0; // We have to use int32_t here as old libicu is brain-damaged + auto append_c_string_filtered(const CharT *str, std::size_t len) -> std::size_t + { + std::int32_t i = 0;// We have to use int32_t here as old libicu is brain-damaged UChar32 uc; char tmp[4]; auto orig_size = storage.size(); @@ -592,7 +663,7 @@ private: } }; -} +}// namespace rspamd::mime -#endif //RSPAMD_MIME_STRING_HXX +#endif//RSPAMD_MIME_STRING_HXX diff --git a/src/libmime/received.cxx b/src/libmime/received.cxx index 8e0609f39..dc16d9b09 100644 --- a/src/libmime/received.cxx +++ b/src/libmime/received.cxx @@ -43,8 +43,10 @@ struct received_part { std::vector<mime_string> comments; explicit received_part(received_part_type t) - : type(t), - data(received_char_filter) {} + : type(t), + data(received_char_filter) + { + } }; static inline auto @@ -74,7 +76,8 @@ received_process_part(const std::string_view &data, read_data, read_tcpinfo, all_done - } state, next_state; + } state, + next_state; /* In this function, we just process comments and data separately */ const auto *p = data.data(); @@ -108,7 +111,7 @@ received_process_part(const std::string_view &data, npart.comments.emplace_back(received_char_filter); auto &comment = npart.comments.back(); received_part_set_or_append(c, p - c, - comment); + comment); } } @@ -128,7 +131,7 @@ received_process_part(const std::string_view &data, if (p > c) { if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { received_part_set_or_append(c, p - c, - npart.data); + npart.data); } } @@ -138,11 +141,11 @@ received_process_part(const std::string_view &data, p++; c = p; } - else if (g_ascii_isspace (*p)) { + else if (g_ascii_isspace(*p)) { if (p > c) { if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { received_part_set_or_append(c, p - c, - npart.data); + npart.data); } } @@ -155,7 +158,7 @@ received_process_part(const std::string_view &data, if (p > c) { if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { received_part_set_or_append(c, p - c, - npart.data); + npart.data); } } @@ -187,7 +190,7 @@ received_process_part(const std::string_view &data, case read_tcpinfo: if (*p == ']') { received_part_set_or_append(c, p - c + 1, - npart.data); + npart.data); seen_tcpinfo = TRUE; state = skip_spaces; next_state = read_data; @@ -214,7 +217,7 @@ received_process_part(const std::string_view &data, if (p > c) { if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) { received_part_set_or_append(c, p - c, - npart.data); + npart.data); } last = p - data.data(); @@ -235,11 +238,11 @@ received_process_part(const std::string_view &data, return false; } -template <std::size_t N> +template<std::size_t N> constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool { - for (auto i = 0; i < N; i ++) { - if (lc_map[(unsigned char)in[i]] != lit[i]) { + for (auto i = 0; i < N; i++) { + if (lc_map[(unsigned char) in[i]] != lit[i]) { return false; } } @@ -259,7 +262,7 @@ received_spill(const std::string_view &in, const auto *end = p + in.size(); auto skip_spaces = [&p, end]() { - while (p < end && g_ascii_isspace (*p)) { + while (p < end && g_ascii_isspace(*p)) { p++; } }; @@ -272,13 +275,13 @@ received_spill(const std::string_view &in, while (p < end) { if (*p == ')') { - ebraces ++; + ebraces++; } else if (*p == '(') { - obraces ++; + obraces++; } - p ++; + p++; if (obraces == ebraces) { /* Skip spaces after */ @@ -317,7 +320,7 @@ received_spill(const std::string_view &in, return {}; } - g_assert (pos != 0); + g_assert(pos != 0); p += pos; len = end > p ? end - p : 0; seen_from = true; @@ -330,7 +333,7 @@ received_spill(const std::string_view &in, return {}; } - g_assert (pos != 0); + g_assert(pos != 0); p += pos; len = end > p ? end - p : 0; seen_by = true; @@ -364,7 +367,7 @@ received_spill(const std::string_view &in, } else { while (p < end) { - if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) { + if (!(g_ascii_isspace(*p) || *p == '(' || *p == ';')) { p++; } else { @@ -389,7 +392,7 @@ received_spill(const std::string_view &in, len = end > p ? end - p : 0; } else { - g_assert (pos != 0); + g_assert(pos != 0); p += pos; len = end > p ? end - p : 0; } @@ -400,7 +403,7 @@ received_spill(const std::string_view &in, } #define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \ - (rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE|RSPAMD_INET_ADDRESS_PARSE_NO_UNIX) + (rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE | RSPAMD_INET_ADDRESS_PARSE_NO_UNIX) static auto received_process_rdns(rspamd_mempool_t *pool, @@ -419,9 +422,9 @@ received_process_rdns(rspamd_mempool_t *pool, if (*p == '[' && *(end - 1) == ']' && in.size() > 2) { /* We have enclosed ip address */ auto *addr = rspamd_parse_inet_address_pool(p + 1, - (end - p) - 2, - pool, - RSPAMD_INET_ADDRESS_PARSE_RECEIVED); + (end - p) - 2, + pool, + RSPAMD_INET_ADDRESS_PARSE_RECEIVED); if (addr) { const gchar *addr_str; @@ -488,9 +491,9 @@ received_process_host_tcpinfo(rspamd_mempool_t *pool, if (brace_pos != std::string_view::npos) { auto substr_addr = in.substr(1, brace_pos - 1); addr = rspamd_parse_inet_address_pool(substr_addr.data(), - substr_addr.size(), - pool, - RSPAMD_INET_ADDRESS_PARSE_RECEIVED); + substr_addr.size(), + pool, + RSPAMD_INET_ADDRESS_PARSE_RECEIVED); if (addr) { rh.addr = addr; @@ -502,7 +505,7 @@ received_process_host_tcpinfo(rspamd_mempool_t *pool, if (g_ascii_isxdigit(in[0])) { /* Try to parse IP address */ addr = rspamd_parse_inet_address_pool(in.data(), - in.size(), pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED); + in.size(), pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED); if (addr) { rh.addr = addr; rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr))); @@ -518,11 +521,11 @@ received_process_host_tcpinfo(rspamd_mempool_t *pool, if (ebrace_pos != std::string_view::npos && ebrace_pos > obrace_pos) { auto substr_addr = in.substr(obrace_pos + 1, - ebrace_pos - obrace_pos - 1); + ebrace_pos - obrace_pos - 1); addr = rspamd_parse_inet_address_pool(substr_addr.data(), - substr_addr.size(), - pool, - RSPAMD_INET_ADDRESS_PARSE_RECEIVED); + substr_addr.size(), + pool, + RSPAMD_INET_ADDRESS_PARSE_RECEIVED); if (addr) { rh.addr = addr; @@ -531,7 +534,7 @@ received_process_host_tcpinfo(rspamd_mempool_t *pool, /* Process with rDNS */ auto rdns_substr = in.substr(0, obrace_pos); - if (received_process_rdns(pool,rdns_substr,rh.real_hostname)) { + if (received_process_rdns(pool, rdns_substr, rh.real_hostname)) { ret = true; } } @@ -568,8 +571,8 @@ received_process_from(rspamd_mempool_t *pool, if (!rpart.comments.empty()) { /* We can have info within comment as part of RFC */ received_process_host_tcpinfo( - pool, rh, - rpart.comments[0].as_view()); + pool, rh, + rpart.comments[0].as_view()); } if (rh.real_ip.size() == 0) { @@ -585,12 +588,12 @@ received_process_from(rspamd_mempool_t *pool, if (rh.real_ip.size() != 0) { /* Get announced hostname (usually helo) */ received_process_rdns(pool, - rpart.data.as_view(), - rh.from_hostname); + rpart.data.as_view(), + rh.from_hostname); } else { received_process_host_tcpinfo(pool, - rh, rpart.data.as_view()); + rh, rpart.data.as_view()); } } } @@ -598,8 +601,8 @@ received_process_from(rspamd_mempool_t *pool, /* rpart->dlen = 0 */ if (!rpart.comments.empty()) { received_process_host_tcpinfo( - pool, rh, - rpart.comments[0].as_view()); + pool, rh, + rpart.comments[0].as_view()); } } } @@ -611,25 +614,23 @@ received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool, { std::ptrdiff_t date_pos = -1; - static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, received_flags>({ - {"smtp", received_flags::SMTP}, - {"esmtp", received_flags::ESMTP}, - {"esmtpa", received_flags::ESMTPA | - received_flags::AUTHENTICATED}, - {"esmtpsa", received_flags::ESMTPSA | - received_flags::SSL | - received_flags::AUTHENTICATED}, - {"esmtps", received_flags::ESMTPS | - received_flags::SSL}, - {"lmtp", received_flags::LMTP}, - {"imap", received_flags::IMAP}, - {"imaps", received_flags::IMAP | - received_flags::SSL}, - {"http", received_flags::HTTP}, - {"https", received_flags::HTTP | - received_flags::SSL}, - {"local", received_flags::LOCAL} - }); + static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, received_flags>({{"smtp", received_flags::SMTP}, + {"esmtp", received_flags::ESMTP}, + {"esmtpa", received_flags::ESMTPA | + received_flags::AUTHENTICATED}, + {"esmtpsa", received_flags::ESMTPSA | + received_flags::SSL | + received_flags::AUTHENTICATED}, + {"esmtps", received_flags::ESMTPS | + received_flags::SSL}, + {"lmtp", received_flags::LMTP}, + {"imap", received_flags::IMAP}, + {"imaps", received_flags::IMAP | + received_flags::SSL}, + {"http", received_flags::HTTP}, + {"https", received_flags::HTTP | + received_flags::SSL}, + {"local", received_flags::LOCAL}}); auto parts = received_spill(in, date_pos); @@ -642,15 +643,15 @@ received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool, rh.flags = received_flags::UNKNOWN; rh.hdr = hdr; - for (const auto &part : parts) { + for (const auto &part: parts) { switch (part.type) { case received_part_type::RSPAMD_RECEIVED_PART_FROM: received_process_from(pool, part, rh); break; case received_part_type::RSPAMD_RECEIVED_PART_BY: received_process_rdns(pool, - part.data.as_view(), - rh.by_hostname); + part.data.as_view(), + rh.by_hostname); break; case received_part_type::RSPAMD_RECEIVED_PART_WITH: if (part.data.size() > 0) { @@ -664,7 +665,7 @@ received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool, case received_part_type::RSPAMD_RECEIVED_PART_FOR: rh.for_mbox.assign_copy(part.data); rh.for_addr = rspamd_email_address_from_smtp(rh.for_mbox.data(), - rh.for_mbox.size()); + rh.for_mbox.size()); break; default: /* Do nothing */ @@ -678,8 +679,8 @@ received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool, if (date_pos > 0 && date_pos < in.size()) { auto date_sub = in.substr(date_pos); - rh.timestamp = rspamd_parse_smtp_date((const unsigned char*)date_sub.data(), - date_sub.size(), nullptr); + rh.timestamp = rspamd_parse_smtp_date((const unsigned char *) date_sub.data(), + date_sub.size(), nullptr); } return true; @@ -713,10 +714,9 @@ received_maybe_fix_task(struct rspamd_task *task) -> bool } } - if (need_recv_correction && !(task->flags & RSPAMD_TASK_FLAG_NO_IP) - && task->from_addr) { - msg_debug_task ("the first received seems to be" - " not ours, prepend it with fake one"); + if (need_recv_correction && !(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) { + msg_debug_task("the first received seems to be" + " not ours, prepend it with fake one"); auto &trecv = recv_chain_ptr->new_received(received_header_chain::append_type::append_head); trecv.flags |= received_flags::ARTIFICIAL; @@ -731,14 +731,14 @@ received_maybe_fix_task(struct rspamd_task *task) -> bool trecv.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(task->from_addr))); - const auto *mta_name = (const char*)rspamd_mempool_get_variable(task->task_pool, - RSPAMD_MEMPOOL_MTA_NAME); + const auto *mta_name = (const char *) rspamd_mempool_get_variable(task->task_pool, + RSPAMD_MEMPOOL_MTA_NAME); if (mta_name) { trecv.by_hostname.assign_copy(std::string_view(mta_name)); } trecv.addr = rspamd_inet_address_copy(task->from_addr, - task->task_pool); + task->task_pool); if (task->hostname) { trecv.real_hostname.assign_copy(std::string_view(task->hostname)); @@ -752,12 +752,12 @@ received_maybe_fix_task(struct rspamd_task *task) -> bool if (!need_recv_correction && (task->flags & RSPAMD_TASK_FLAG_NO_IP) && (task->cfg && !task->cfg->ignore_received)) { if (!top_recv.real_ip.empty()) { - if (!rspamd_parse_inet_address (&task->from_addr, - top_recv.real_ip.data(), - top_recv.real_ip.size(), - RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) { - msg_warn_task ("cannot get IP from received header: '%s'", - top_recv.real_ip.data()); + if (!rspamd_parse_inet_address(&task->from_addr, + top_recv.real_ip.data(), + top_recv.real_ip.size(), + RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) { + msg_warn_task("cannot get IP from received header: '%s'", + top_recv.real_ip.data()); task->from_addr = nullptr; } } @@ -789,8 +789,8 @@ received_export_to_lua(received_header_chain *chain, lua_State *L) -> bool auto i = 1; - for (const auto &rh : chain->as_vector()) { - lua_createtable (L, 0, 10); + for (const auto &rh: chain->as_vector()) { + lua_createtable(L, 0, 10); if (rh.hdr && rh.hdr->decoded) { rspamd_lua_table_set(L, "raw", rh.hdr->decoded); @@ -838,57 +838,51 @@ received_export_to_lua(received_header_chain *chain, lua_State *L) -> bool return true; } -} // namespace rspamd::mime +}// namespace rspamd::mime -bool -rspamd_received_header_parse(struct rspamd_task *task, - const char *data, size_t sz, - struct rspamd_mime_header *hdr) +bool rspamd_received_header_parse(struct rspamd_task *task, + const char *data, size_t sz, + struct rspamd_mime_header *hdr) { - auto *recv_chain_ptr = static_cast<rspamd::mime::received_header_chain *> - (MESSAGE_FIELD(task, received_headers)); + auto *recv_chain_ptr = static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)); if (recv_chain_ptr == nullptr) { /* This constructor automatically registers dtor in mempool */ recv_chain_ptr = new rspamd::mime::received_header_chain(task); - MESSAGE_FIELD(task, received_headers) = (void *)recv_chain_ptr; + MESSAGE_FIELD(task, received_headers) = (void *) recv_chain_ptr; } return rspamd::mime::received_header_parse(*recv_chain_ptr, task->task_pool, - std::string_view{data, sz}, hdr); + std::string_view{data, sz}, hdr); } -bool -rspamd_received_maybe_fix_task(struct rspamd_task *task) +bool rspamd_received_maybe_fix_task(struct rspamd_task *task) { return rspamd::mime::received_maybe_fix_task(task); } -bool -rspamd_received_export_to_lua(struct rspamd_task *task, lua_State *L) +bool rspamd_received_export_to_lua(struct rspamd_task *task, lua_State *L) { return rspamd::mime::received_export_to_lua( - static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)), - L); + static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)), + L); } /* Tests part */ #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL #include "doctest/doctest.h" -TEST_SUITE("received") { -TEST_CASE("parse received") +TEST_SUITE("received") { - using namespace std::string_view_literals; - using map_type = ankerl::unordered_dense::map<std::string_view, std::string_view>; - std::vector<std::pair<std::string_view, map_type>> cases{ + TEST_CASE("parse received") + { + using namespace std::string_view_literals; + using map_type = ankerl::unordered_dense::map<std::string_view, std::string_view>; + std::vector<std::pair<std::string_view, map_type>> cases{ // Simple received {"from smtp11.mailtrack.pl (smtp11.mailtrack.pl [185.243.30.90])"sv, - { - {"real_ip", "185.243.30.90"}, - {"real_hostname", "smtp11.mailtrack.pl"}, - {"from_hostname", "smtp11.mailtrack.pl"} - } - }, + {{"real_ip", "185.243.30.90"}, + {"real_hostname", "smtp11.mailtrack.pl"}, + {"from_hostname", "smtp11.mailtrack.pl"}}}, // Real Postfix IPv6 received {"from server.chat-met-vreemden.nl (unknown [IPv6:2a01:7c8:aab6:26d:5054:ff:fed1:1da2])\n" "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n" @@ -896,26 +890,22 @@ TEST_CASE("parse received") "\tby mx1.freebsd.org (Postfix) with ESMTPS id CF0171862\n" "\tfor <test@example.com>; Mon, 6 Jul 2015 09:01:20 +0000 (UTC)\n" "\t(envelope-from upwest201diana@outlook.com)"sv, - { - {"real_ip", "2a01:7c8:aab6:26d:5054:ff:fed1:1da2"}, - {"from_hostname", "server.chat-met-vreemden.nl"}, - {"by_hostname", "mx1.freebsd.org"}, - {"for_mbox", "<test@example.com>"} - } - }, + {{"real_ip", "2a01:7c8:aab6:26d:5054:ff:fed1:1da2"}, + {"from_hostname", "server.chat-met-vreemden.nl"}, + {"by_hostname", "mx1.freebsd.org"}, + {"for_mbox", "<test@example.com>"}}}, // Exim IPv4 received {"from localhost ([127.0.0.1]:49019 helo=hummus.csx.cam.ac.uk)\n" " by hummus.csx.cam.ac.uk with esmtp (Exim 4.91-pdpfix1)\n" " (envelope-from <exim-dev-bounces@exim.org>)\n" " id 1fZ55o-0006DP-3H\n" " for <xxx@xxx.xxx>; Sat, 30 Jun 2018 02:54:28 +0100"sv, - { - {"from_hostname", "localhost"}, - {"real_ip", "127.0.0.1"}, - {"for_mbox", "<xxx@xxx.xxx>"}, - {"by_hostname", "hummus.csx.cam.ac.uk"}, - } - }, + { + {"from_hostname", "localhost"}, + {"real_ip", "127.0.0.1"}, + {"for_mbox", "<xxx@xxx.xxx>"}, + {"by_hostname", "hummus.csx.cam.ac.uk"}, + }}, // Exim IPv6 received {"from smtp.spodhuis.org ([2a02:898:31:0:48:4558:736d:7470]:38689\n" " helo=mx.spodhuis.org)\n" @@ -923,116 +913,105 @@ TEST_CASE("parse received") " (Exim 4.91-pdpfix1+cc) (envelope-from <xxx@exim.org>)\n" " id 1fZ55k-0006CO-9M\n" " for exim-dev@exim.org; Sat, 30 Jun 2018 02:54:24 +0100"sv, - { - {"from_hostname", "smtp.spodhuis.org"}, - {"real_ip", "2a02:898:31:0:48:4558:736d:7470"}, - {"for_mbox", "exim-dev@exim.org"}, - {"by_hostname", "hummus.csx.cam.ac.uk"}, - } - }, + { + {"from_hostname", "smtp.spodhuis.org"}, + {"real_ip", "2a02:898:31:0:48:4558:736d:7470"}, + {"for_mbox", "exim-dev@exim.org"}, + {"by_hostname", "hummus.csx.cam.ac.uk"}, + }}, // Haraka received {"from aaa.cn ([1.1.1.1]) by localhost.localdomain (Haraka/2.8.18) with " "ESMTPA id 349C9C2B-491A-4925-A687-3EF14038C344.1 envelope-from <huxin@xxx.com> " "(authenticated bits=0); Tue, 03 Jul 2018 14:18:13 +0200"sv, - { - {"from_hostname", "aaa.cn"}, - {"real_ip", "1.1.1.1"}, - {"by_hostname", "localhost.localdomain"}, - } - }, + { + {"from_hostname", "aaa.cn"}, + {"real_ip", "1.1.1.1"}, + {"by_hostname", "localhost.localdomain"}, + }}, // Invalid by {"from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) " "by guovswzqkvry051@sohu.com with gg login " "by AOL 6.0 for Windows US sub 008 SMTP ; Tue, 03 Jul 2018 09:01:47 -0300"sv, - { - {"from_hostname", "192.83.172.101"}, - {"real_ip", "192.83.172.101"}, - } - }, + { + {"from_hostname", "192.83.172.101"}, + {"real_ip", "192.83.172.101"}, + }}, // Invalid hostinfo {"from example.com ([]) by example.com with ESMTP id 2019091111 ;" " Thu, 26 Sep 2019 11:19:07 +0200"sv, - { - {"by_hostname", "example.com"}, - {"from_hostname", "example.com"}, - {"real_hostname", "example.com"}, - } - }, + { + {"by_hostname", "example.com"}, + {"from_hostname", "example.com"}, + {"real_hostname", "example.com"}, + }}, // Different real and announced hostnames + broken crap {"from 171-29.br (1-1-1-1.z.com.br [1.1.1.1]) by x.com.br (Postfix) " "with;ESMTP id 44QShF6xj4z1X for <hey@y.br>; Thu, 21 Mar 2019 23:45:46 -0300 " ": <g @yi.br>"sv, - { - {"real_ip", "1.1.1.1"}, - {"from_hostname", "171-29.br"}, - {"real_hostname", "1-1-1-1.z.com.br"}, - {"by_hostname", "x.com.br"}, - } - }, + { + {"real_ip", "1.1.1.1"}, + {"from_hostname", "171-29.br"}, + {"real_hostname", "1-1-1-1.z.com.br"}, + {"by_hostname", "x.com.br"}, + }}, // Different real and announced ips + no hostname {"from [127.0.0.1] ([127.0.0.2]) by smtp.gmail.com with ESMTPSA id xxxololo"sv, - { - {"real_ip", "127.0.0.2"}, - {"from_hostname", "127.0.0.1"}, - {"by_hostname", "smtp.gmail.com"}, - } - }, + { + {"real_ip", "127.0.0.2"}, + {"from_hostname", "127.0.0.1"}, + {"by_hostname", "smtp.gmail.com"}, + }}, // Different real and hostanes {"from 185.118.166.127 (steven2.zhou01.pserver.ru [185.118.166.127]) " "by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv, - { - {"real_ip", "185.118.166.127"}, - {"from_hostname", "185.118.166.127"}, - {"real_hostname", "steven2.zhou01.pserver.ru"}, - {"by_hostname", "mail.832zsu.cn"}, - } - }, + { + {"real_ip", "185.118.166.127"}, + {"from_hostname", "185.118.166.127"}, + {"real_hostname", "steven2.zhou01.pserver.ru"}, + {"by_hostname", "mail.832zsu.cn"}, + }}, // \0 in received must be filtered {"from smtp11.mailt\0rack.pl (smtp11.mail\0track.pl [1\085.243.30.90])"sv, - { - {"real_ip", "185.243.30.90"}, - {"real_hostname", "smtp11.mailtrack.pl"}, - {"from_hostname", "smtp11.mailtrack.pl"} - } - }, + {{"real_ip", "185.243.30.90"}, + {"real_hostname", "smtp11.mailtrack.pl"}, + {"from_hostname", "smtp11.mailtrack.pl"}}}, // No from part {"by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv, - { - {"by_hostname", "mail.832zsu.cn"}, - } - }, + { + {"by_hostname", "mail.832zsu.cn"}, + }}, // From part is in the comment {"(from asterisk@localhost)\n" " by pbx.xxx.com (8.14.7/8.14.7/Submit) id 076Go4wD014562;\n" " Thu, 6 Aug 2020 11:50:04 -0500"sv, - { - {"by_hostname", "pbx.xxx.com"}, - } - }, - }; - rspamd_mempool_t *pool = rspamd_mempool_new_default("rcvd test", 0); - - for (auto &&c : cases) { - SUBCASE(c.first.data()) { - rspamd::mime::received_header_chain chain; - auto ret = rspamd::mime::received_header_parse(chain, pool, - c.first, nullptr); - CHECK(ret == true); - auto &&rh = chain.get_received(0); - CHECK(rh.has_value()); - auto res = rh.value().get().as_map(); - - for (const auto &expected : c.second) { - CHECK_MESSAGE(res.contains(expected.first), expected.first.data()); - CHECK(res[expected.first] == expected.second); - } - for (const auto &existing : res) { - CHECK_MESSAGE(c.second.contains(existing.first), existing.first.data()); - CHECK(c.second[existing.first] == existing.second); + { + {"by_hostname", "pbx.xxx.com"}, + }}, + }; + rspamd_mempool_t *pool = rspamd_mempool_new_default("rcvd test", 0); + + for (auto &&c: cases) { + SUBCASE(c.first.data()) + { + rspamd::mime::received_header_chain chain; + auto ret = rspamd::mime::received_header_parse(chain, pool, + c.first, nullptr); + CHECK(ret == true); + auto &&rh = chain.get_received(0); + CHECK(rh.has_value()); + auto res = rh.value().get().as_map(); + + for (const auto &expected: c.second) { + CHECK_MESSAGE(res.contains(expected.first), expected.first.data()); + CHECK(res[expected.first] == expected.second); + } + for (const auto &existing: res) { + CHECK_MESSAGE(c.second.contains(existing.first), existing.first.data()); + CHECK(c.second[existing.first] == existing.second); + } } } - } - rspamd_mempool_delete(pool); -} + rspamd_mempool_delete(pool); + } }
\ No newline at end of file diff --git a/src/libmime/received.h b/src/libmime/received.h index 14f9f848b..46608a39a 100644 --- a/src/libmime/received.h +++ b/src/libmime/received.h @@ -21,7 +21,7 @@ #include "config.h" #include "libutil/addr.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif /* @@ -41,7 +41,7 @@ struct rspamd_mime_header; * @return */ bool rspamd_received_header_parse(struct rspamd_task *task, - const char *data, size_t sz, struct rspamd_mime_header *hdr); + const char *data, size_t sz, struct rspamd_mime_header *hdr); /** @@ -60,9 +60,9 @@ struct lua_State; */ bool rspamd_received_export_to_lua(struct rspamd_task *task, struct lua_State *L); -#ifdef __cplusplus +#ifdef __cplusplus } #endif -#endif //RSPAMD_RECEIVED_H +#endif//RSPAMD_RECEIVED_H diff --git a/src/libmime/received.hxx b/src/libmime/received.hxx index a5d5e3fe6..4f423f1a3 100644 --- a/src/libmime/received.hxx +++ b/src/libmime/received.hxx @@ -60,44 +60,46 @@ enum class received_flags { AUTHENTICATED = (1u << 13u), }; -constexpr received_flags operator |(received_flags lhs, received_flags rhs) +constexpr received_flags operator|(received_flags lhs, received_flags rhs) { using ut = std::underlying_type<received_flags>::type; return static_cast<received_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs)); } -constexpr received_flags operator |=(received_flags &lhs, const received_flags rhs) +constexpr received_flags operator|=(received_flags &lhs, const received_flags rhs) { using ut = std::underlying_type<received_flags>::type; lhs = static_cast<received_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs)); return lhs; } -constexpr received_flags operator &(received_flags lhs, received_flags rhs) +constexpr received_flags operator&(received_flags lhs, received_flags rhs) { using ut = std::underlying_type<received_flags>::type; return static_cast<received_flags>(static_cast<ut>(lhs) & static_cast<ut>(rhs)); } -constexpr bool operator !(received_flags fl) +constexpr bool operator!(received_flags fl) { return fl == received_flags::DEFAULT; } -constexpr received_flags received_type_apply_protocols_mask(received_flags fl) { - return fl & (received_flags::SMTP| - received_flags::ESMTP| - received_flags::ESMTPA| - received_flags::ESMTPS| - received_flags::ESMTPSA| - received_flags::IMAP| - received_flags::HTTP| - received_flags::LOCAL| - received_flags::MAPI| - received_flags::LMTP); +constexpr received_flags received_type_apply_protocols_mask(received_flags fl) +{ + return fl & (received_flags::SMTP | + received_flags::ESMTP | + received_flags::ESMTPA | + received_flags::ESMTPS | + received_flags::ESMTPSA | + received_flags::IMAP | + received_flags::HTTP | + received_flags::LOCAL | + received_flags::MAPI | + received_flags::LMTP); } -constexpr const char *received_protocol_to_string(received_flags fl) { +constexpr const char *received_protocol_to_string(received_flags fl) +{ const auto *proto = "unknown"; switch (received_type_apply_protocols_mask(fl)) { @@ -151,18 +153,22 @@ struct received_header { received_flags flags = received_flags::DEFAULT; /* See enum rspamd_received_type */ received_header() noexcept - : from_hostname(received_char_filter), - real_hostname(received_char_filter), - real_ip(received_char_filter), - by_hostname(received_char_filter), - for_mbox() {} + : from_hostname(received_char_filter), + real_hostname(received_char_filter), + real_ip(received_char_filter), + by_hostname(received_char_filter), + for_mbox() + { + } /* We have raw C pointers, so copy is explicitly disabled */ received_header(const received_header &other) = delete; - received_header(received_header &&other) noexcept { + received_header(received_header &&other) noexcept + { *this = std::move(other); } - received_header& operator=(received_header &&other) noexcept { + received_header &operator=(received_header &&other) noexcept + { if (this != &other) { from_hostname = std::move(other.from_hostname); real_hostname = std::move(other.real_hostname); @@ -179,7 +185,8 @@ struct received_header { } /* Unit tests helper */ - static auto from_map(const ankerl::unordered_dense::map<std::string_view, std::string_view> &map) -> received_header { + static auto from_map(const ankerl::unordered_dense::map<std::string_view, std::string_view> &map) -> received_header + { using namespace std::string_view_literals; received_header rh; @@ -225,7 +232,8 @@ struct received_header { return map; } - ~received_header() { + ~received_header() + { if (for_addr) { rspamd_email_address_free(for_addr); } @@ -234,12 +242,14 @@ struct received_header { class received_header_chain { public: - explicit received_header_chain(struct rspamd_task *task) { + explicit received_header_chain(struct rspamd_task *task) + { headers.reserve(2); rspamd_mempool_add_destructor(task->task_pool, - received_header_chain::received_header_chain_pool_dtor, this); + received_header_chain::received_header_chain_pool_dtor, this); } - explicit received_header_chain() { + explicit received_header_chain() + { headers.reserve(2); } @@ -248,7 +258,8 @@ public: append_head }; - auto new_received(append_type how = append_type::append_tail) -> received_header & { + auto new_received(append_type how = append_type::append_tail) -> received_header & + { if (how == append_type::append_tail) { headers.emplace_back(); @@ -260,7 +271,8 @@ public: return headers.front(); } } - auto new_received(received_header &&hdr, append_type how = append_type::append_tail) -> received_header & { + auto new_received(received_header &&hdr, append_type how = append_type::append_tail) -> received_header & + { if (how == append_type::append_tail) { headers.emplace_back(std::move(hdr)); @@ -272,26 +284,31 @@ public: return headers.front(); } } - auto get_received(std::size_t nth) -> std::optional<std::reference_wrapper<received_header>>{ + auto get_received(std::size_t nth) -> std::optional<std::reference_wrapper<received_header>> + { if (nth < headers.size()) { return headers[nth]; } return std::nullopt; } - auto size() const -> std::size_t { + auto size() const -> std::size_t + { return headers.size(); } - constexpr auto as_vector() const -> const std::vector<received_header>& { + constexpr auto as_vector() const -> const std::vector<received_header> & + { return headers; } + private: - static auto received_header_chain_pool_dtor(void *ptr) -> void { + static auto received_header_chain_pool_dtor(void *ptr) -> void + { delete static_cast<received_header_chain *>(ptr); } std::vector<received_header> headers; }; -} // namespace rspamd::mime +}// namespace rspamd::mime -#endif //RSPAMD_RECEIVED_HXX +#endif//RSPAMD_RECEIVED_HXX diff --git a/src/libmime/scan_result.c b/src/libmime/scan_result.c index 4d8720edb..37bd2ddaa 100644 --- a/src/libmime/scan_result.c +++ b/src/libmime/scan_result.c @@ -25,10 +25,10 @@ #include <math.h> #include "contrib/uthash/utlist.h" -#define msg_debug_metric(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_metric_log_id, "metric", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) +#define msg_debug_metric(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_metric_log_id, "metric", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) INIT_LOG_MODULE(metric) @@ -36,40 +36,40 @@ INIT_LOG_MODULE(metric) static struct rspamd_counter_data symbols_count; static void -rspamd_scan_result_dtor (gpointer d) +rspamd_scan_result_dtor(gpointer d) { - struct rspamd_scan_result *r = (struct rspamd_scan_result *)d; + struct rspamd_scan_result *r = (struct rspamd_scan_result *) d; struct rspamd_symbol_result *sres; - rspamd_set_counter_ema (&symbols_count, kh_size (r->symbols), 0.5); + rspamd_set_counter_ema(&symbols_count, kh_size(r->symbols), 0.5); if (r->symbol_cbref != -1) { - luaL_unref (r->task->cfg->lua_state, LUA_REGISTRYINDEX, r->symbol_cbref); + luaL_unref(r->task->cfg->lua_state, LUA_REGISTRYINDEX, r->symbol_cbref); } - kh_foreach_value (r->symbols, sres, { + kh_foreach_value(r->symbols, sres, { if (sres->options) { - kh_destroy (rspamd_options_hash, sres->options); + kh_destroy(rspamd_options_hash, sres->options); } }); - kh_destroy (rspamd_symbols_hash, r->symbols); - kh_destroy (rspamd_symbols_group_hash, r->sym_groups); + kh_destroy(rspamd_symbols_hash, r->symbols); + kh_destroy(rspamd_symbols_group_hash, r->sym_groups); } struct rspamd_scan_result * -rspamd_create_metric_result (struct rspamd_task *task, - const gchar *name, gint lua_sym_cbref) +rspamd_create_metric_result(struct rspamd_task *task, + const gchar *name, gint lua_sym_cbref) { struct rspamd_scan_result *metric_res; guint i; - metric_res = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_scan_result)); - metric_res->symbols = kh_init (rspamd_symbols_hash); - metric_res->sym_groups = kh_init (rspamd_symbols_group_hash); + metric_res = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_scan_result)); + metric_res->symbols = kh_init(rspamd_symbols_hash); + metric_res->sym_groups = kh_init(rspamd_symbols_group_hash); if (name) { - metric_res->name = rspamd_mempool_strdup (task->task_pool, name); + metric_res->name = rspamd_mempool_strdup(task->task_pool, name); } else { metric_res->name = NULL; @@ -79,23 +79,24 @@ rspamd_create_metric_result (struct rspamd_task *task, metric_res->task = task; /* Optimize allocation */ - kh_resize (rspamd_symbols_group_hash, metric_res->sym_groups, 4); + kh_resize(rspamd_symbols_group_hash, metric_res->sym_groups, 4); if (symbols_count.mean > 4) { - kh_resize (rspamd_symbols_hash, metric_res->symbols, symbols_count.mean); + kh_resize(rspamd_symbols_hash, metric_res->symbols, symbols_count.mean); } else { - kh_resize (rspamd_symbols_hash, metric_res->symbols, 4); + kh_resize(rspamd_symbols_hash, metric_res->symbols, 4); } if (task->cfg) { struct rspamd_action *act, *tmp; - metric_res->actions_config = rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct rspamd_action_config) * HASH_COUNT (task->cfg->actions)); + metric_res->actions_config = rspamd_mempool_alloc0(task->task_pool, + sizeof(struct rspamd_action_config) * HASH_COUNT(task->cfg->actions)); i = 0; - HASH_ITER (hh, task->cfg->actions, act, tmp) { + HASH_ITER(hh, task->cfg->actions, act, tmp) + { metric_res->actions_config[i].flags = RSPAMD_ACTION_RESULT_DEFAULT; if (!(act->flags & RSPAMD_ACTION_NO_THRESHOLD)) { metric_res->actions_config[i].cur_limit = act->threshold; @@ -105,36 +106,35 @@ rspamd_create_metric_result (struct rspamd_task *task, } metric_res->actions_config[i].action = act; - i ++; + i++; } metric_res->nactions = i; } - rspamd_mempool_add_destructor (task->task_pool, - rspamd_scan_result_dtor, - metric_res); - DL_APPEND (task->result, metric_res); + rspamd_mempool_add_destructor(task->task_pool, + rspamd_scan_result_dtor, + metric_res); + DL_APPEND(task->result, metric_res); return metric_res; } static inline int -rspamd_pr_sort (const struct rspamd_passthrough_result *pra, - const struct rspamd_passthrough_result *prb) +rspamd_pr_sort(const struct rspamd_passthrough_result *pra, + const struct rspamd_passthrough_result *prb) { return prb->priority - pra->priority; } -bool -rspamd_add_passthrough_result (struct rspamd_task *task, - struct rspamd_action *action, - guint priority, - double target_score, - const gchar *message, - const gchar *module, - uint flags, - struct rspamd_scan_result *scan_result) +bool rspamd_add_passthrough_result(struct rspamd_task *task, + struct rspamd_action *action, + guint priority, + double target_score, + const gchar *message, + const gchar *module, + uint flags, + struct rspamd_scan_result *scan_result) { struct rspamd_passthrough_result *pr; @@ -145,7 +145,7 @@ rspamd_add_passthrough_result (struct rspamd_task *task, /* Find the specific action config */ struct rspamd_action_config *action_config = NULL; - for (unsigned int i = 0; i < scan_result->nactions; i ++) { + for (unsigned int i = 0; i < scan_result->nactions; i++) { struct rspamd_action_config *cur = &scan_result->actions_config[i]; /* We assume that all action pointers are static */ @@ -156,16 +156,16 @@ rspamd_add_passthrough_result (struct rspamd_task *task, } if (action_config && (action_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) { - msg_info_task ("<%s>: NOT set pre-result to '%s' %s(%.2f): '%s' from %s(%d); action is disabled", - MESSAGE_FIELD_CHECK (task, message_id), action->name, - flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", - target_score, - message, module, priority); + msg_info_task("<%s>: NOT set pre-result to '%s' %s(%.2f): '%s' from %s(%d); action is disabled", + MESSAGE_FIELD_CHECK(task, message_id), action->name, + flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", + target_score, + message, module, priority); return false; } - pr = rspamd_mempool_alloc (task->task_pool, sizeof (*pr)); + pr = rspamd_mempool_alloc(task->task_pool, sizeof(*pr)); pr->action = action; pr->priority = priority; pr->message = message; @@ -173,41 +173,42 @@ rspamd_add_passthrough_result (struct rspamd_task *task, pr->target_score = target_score; pr->flags = flags; - DL_APPEND (scan_result->passthrough_result, pr); - DL_SORT (scan_result->passthrough_result, rspamd_pr_sort); + DL_APPEND(scan_result->passthrough_result, pr); + DL_SORT(scan_result->passthrough_result, rspamd_pr_sort); - if (!isnan (target_score)) { + if (!isnan(target_score)) { - msg_info_task ("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)", - MESSAGE_FIELD_CHECK (task, message_id), action->name, - flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", - target_score, - message, module, priority); + msg_info_task("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)", + MESSAGE_FIELD_CHECK(task, message_id), action->name, + flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", + target_score, + message, module, priority); } else { - msg_info_task ("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)", - MESSAGE_FIELD_CHECK (task, message_id), action->name, - flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", - message, module, priority); + msg_info_task("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)", + MESSAGE_FIELD_CHECK(task, message_id), action->name, + flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "", + message, module, priority); } - scan_result->nresults ++; + scan_result->nresults++; return true; } static inline gdouble -rspamd_check_group_score (struct rspamd_task *task, - const gchar *symbol, - struct rspamd_symbols_group *gr, - gdouble *group_score, - gdouble w) +rspamd_check_group_score(struct rspamd_task *task, + const gchar *symbol, + struct rspamd_symbols_group *gr, + gdouble *group_score, + gdouble w) { if (gr != NULL && group_score && gr->max_score > 0.0 && w > 0.0) { if (*group_score >= gr->max_score && w > 0) { - msg_info_task ("maximum group score %.2f for group %s has been reached," - " ignoring symbol %s with weight %.2f", gr->max_score, - gr->name, symbol, w); + msg_info_task("maximum group score %.2f for group %s has been reached," + " ignoring symbol %s with weight %.2f", + gr->max_score, + gr->name, symbol, w); return NAN; } else if (*group_score + w > gr->max_score) { @@ -223,13 +224,13 @@ rspamd_check_group_score (struct rspamd_task *task, #endif static struct rspamd_symbol_result * -insert_metric_result (struct rspamd_task *task, - const gchar *symbol, - double weight, - const gchar *opt, - struct rspamd_scan_result *metric_res, - enum rspamd_symbol_insert_flags flags, - bool *new_sym) +insert_metric_result(struct rspamd_task *task, + const gchar *symbol, + double weight, + const gchar *opt, + struct rspamd_scan_result *metric_res, + enum rspamd_symbol_insert_flags flags, + bool *new_sym) { struct rspamd_symbol_result *symbol_result = NULL; gdouble final_score, *gr_score = NULL, next_gf = 1.0, diff; @@ -242,16 +243,16 @@ insert_metric_result (struct rspamd_task *task, gboolean single = !!(flags & RSPAMD_SYMBOL_INSERT_SINGLE); gchar *sym_cpy; - if (!isfinite (weight)) { - msg_warn_task ("detected %s score for symbol %s, replace it with zero", - isnan (weight) ? "NaN" : "infinity", symbol); + if (!isfinite(weight)) { + msg_warn_task("detected %s score for symbol %s, replace it with zero", + isnan(weight) ? "NaN" : "infinity", symbol); weight = 0.0; } - msg_debug_metric ("want to insert symbol %s, initial weight %.2f", - symbol, weight); + msg_debug_metric("want to insert symbol %s, initial weight %.2f", + symbol, weight); - sdef = g_hash_table_lookup (task->cfg->symbols, symbol); + sdef = g_hash_table_lookup(task->cfg->symbols, symbol); if (sdef == NULL) { if (flags & RSPAMD_SYMBOL_INSERT_ENFORCE) { final_score = 1.0 * weight; /* Enforce static weight to 1.0 */ @@ -260,67 +261,69 @@ insert_metric_result (struct rspamd_task *task, final_score = 0.0; } - msg_debug_metric ("no symbol definition for %s; final multiplier %.2f", - symbol, final_score); + msg_debug_metric("no symbol definition for %s; final multiplier %.2f", + symbol, final_score); } else { if (sdef->cache_item) { /* Check if we can insert this symbol at all */ - if (!rspamd_symcache_is_item_allowed (task, sdef->cache_item, FALSE)) { - msg_debug_metric ("symbol %s is not allowed to be inserted due to settings", - symbol); + if (!rspamd_symcache_is_item_allowed(task, sdef->cache_item, FALSE)) { + msg_debug_metric("symbol %s is not allowed to be inserted due to settings", + symbol); return NULL; } } final_score = (*sdef->weight_ptr) * weight; - PTR_ARRAY_FOREACH (sdef->groups, i, gr) { - k = kh_get (rspamd_symbols_group_hash, metric_res->sym_groups, gr); + PTR_ARRAY_FOREACH(sdef->groups, i, gr) + { + k = kh_get(rspamd_symbols_group_hash, metric_res->sym_groups, gr); - if (k == kh_end (metric_res->sym_groups)) { - k = kh_put (rspamd_symbols_group_hash, metric_res->sym_groups, - gr, &ret); - kh_value (metric_res->sym_groups, k) = 0; + if (k == kh_end(metric_res->sym_groups)) { + k = kh_put(rspamd_symbols_group_hash, metric_res->sym_groups, + gr, &ret); + kh_value(metric_res->sym_groups, k) = 0; } } - msg_debug_metric ("metric multiplier for %s is %.2f", - symbol, *sdef->weight_ptr); + msg_debug_metric("metric multiplier for %s is %.2f", + symbol, *sdef->weight_ptr); } if (task->settings) { gdouble corr; - mobj = ucl_object_lookup (task->settings, "scores"); + mobj = ucl_object_lookup(task->settings, "scores"); if (!mobj) { /* Legacy */ mobj = task->settings; } else { - msg_debug_metric ("found scores in the settings"); + msg_debug_metric("found scores in the settings"); } - sobj = ucl_object_lookup (mobj, symbol); - if (sobj != NULL && ucl_object_todouble_safe (sobj, &corr)) { - msg_debug_metric ("settings: changed weight of symbol %s from %.2f " - "to %.2f * %.2f", - symbol, final_score, corr, weight); + sobj = ucl_object_lookup(mobj, symbol); + if (sobj != NULL && ucl_object_todouble_safe(sobj, &corr)) { + msg_debug_metric("settings: changed weight of symbol %s from %.2f " + "to %.2f * %.2f", + symbol, final_score, corr, weight); final_score = corr * weight; } } - k = kh_get (rspamd_symbols_hash, metric_res->symbols, symbol); - if (k != kh_end (metric_res->symbols)) { + k = kh_get(rspamd_symbols_hash, metric_res->symbols, symbol); + if (k != kh_end(metric_res->symbols)) { /* Existing metric score */ - symbol_result = kh_value (metric_res->symbols, k); + symbol_result = kh_value(metric_res->symbols, k); if (single) { max_shots = 1; } else { if (sdef) { if (sdef->groups) { - PTR_ARRAY_FOREACH(sdef->groups, i, gr) { + PTR_ARRAY_FOREACH(sdef->groups, i, gr) + { if (gr->flags & RSPAMD_SYMBOL_GROUP_ONE_SHOT) { max_shots = 1; } @@ -334,32 +337,32 @@ insert_metric_result (struct rspamd_task *task, } } - msg_debug_metric ("nshots: %d for symbol %s", max_shots, symbol); + msg_debug_metric("nshots: %d for symbol %s", max_shots, symbol); if (!single && (max_shots > 0 && (symbol_result->nshots >= max_shots))) { single = TRUE; } - symbol_result->nshots ++; + symbol_result->nshots++; if (opt) { - rspamd_task_add_result_option (task, symbol_result, opt, strlen (opt)); + rspamd_task_add_result_option(task, symbol_result, opt, strlen(opt)); } /* Adjust diff */ if (!single) { diff = final_score; - msg_debug_metric ("symbol %s can be inserted multiple times: %.2f weight", - symbol, diff); + msg_debug_metric("symbol %s can be inserted multiple times: %.2f weight", + symbol, diff); } else { - if (fabs (symbol_result->score) < fabs (final_score) && - signbit (symbol_result->score) == signbit (final_score)) { + if (fabs(symbol_result->score) < fabs(final_score) && + signbit(symbol_result->score) == signbit(final_score)) { /* Replace less significant weight with a more significant one */ diff = final_score - symbol_result->score; - msg_debug_metric ("symbol %s can be inserted single time;" - " weight adjusted %.2f + %.2f", - symbol, symbol_result->score, diff); + msg_debug_metric("symbol %s can be inserted single time;" + " weight adjusted %.2f + %.2f", + symbol, symbol_result->score, diff); } else { diff = 0; @@ -376,26 +379,27 @@ insert_metric_result (struct rspamd_task *task, next_gf = task->cfg->grow_factor; } - msg_debug_metric ("adjust grow factor to %.2f for symbol %s (%.2f final)", - next_gf, symbol, diff); + msg_debug_metric("adjust grow factor to %.2f for symbol %s (%.2f final)", + next_gf, symbol, diff); if (sdef) { - PTR_ARRAY_FOREACH (sdef->groups, i, gr) { + PTR_ARRAY_FOREACH(sdef->groups, i, gr) + { gdouble cur_diff; - k = kh_get (rspamd_symbols_group_hash, - metric_res->sym_groups, gr); - g_assert (k != kh_end (metric_res->sym_groups)); - gr_score = &kh_value (metric_res->sym_groups, k); - cur_diff = rspamd_check_group_score (task, symbol, gr, - gr_score, diff); + k = kh_get(rspamd_symbols_group_hash, + metric_res->sym_groups, gr); + g_assert(k != kh_end(metric_res->sym_groups)); + gr_score = &kh_value(metric_res->sym_groups, k); + cur_diff = rspamd_check_group_score(task, symbol, gr, + gr_score, diff); - if (isnan (cur_diff)) { + if (isnan(cur_diff)) { /* Limit reached, do not add result */ - msg_debug_metric ( - "group limit %.2f is reached for %s when inserting symbol %s;" - " drop score %.2f", - *gr_score, gr->name, symbol, diff); + msg_debug_metric( + "group limit %.2f is reached for %s when inserting symbol %s;" + " drop score %.2f", + *gr_score, gr->name, symbol, diff); diff = NAN; break; @@ -405,27 +409,28 @@ insert_metric_result (struct rspamd_task *task, if (cur_diff < diff) { /* Reduce */ - msg_debug_metric ( - "group limit %.2f is reached for %s when inserting symbol %s;" - " reduce score %.2f - %.2f", - *gr_score, gr->name, symbol, diff, cur_diff); + msg_debug_metric( + "group limit %.2f is reached for %s when inserting symbol %s;" + " reduce score %.2f - %.2f", + *gr_score, gr->name, symbol, diff, cur_diff); diff = cur_diff; } } } } - if (!isnan (diff)) { + if (!isnan(diff)) { metric_res->score += diff; metric_res->grow_factor = next_gf; if (single) { - msg_debug_metric ("final score for single symbol %s = %.2f; %.2f diff", - symbol, final_score, diff); + msg_debug_metric("final score for single symbol %s = %.2f; %.2f diff", + symbol, final_score, diff); symbol_result->score = final_score; - } else { - msg_debug_metric ("increase final score for multiple symbol %s += %.2f = %.2f", - symbol, symbol_result->score, diff); + } + else { + msg_debug_metric("increase final score for multiple symbol %s += %.2f = %.2f", + symbol, symbol_result->score, diff); symbol_result->score += diff; } } @@ -437,12 +442,12 @@ insert_metric_result (struct rspamd_task *task, *new_sym = true; } - sym_cpy = rspamd_mempool_strdup (task->task_pool, symbol); - k = kh_put (rspamd_symbols_hash, metric_res->symbols, - sym_cpy, &ret); - g_assert (ret > 0); - symbol_result = rspamd_mempool_alloc0 (task->task_pool, sizeof (*symbol_result)); - kh_value (metric_res->symbols, k) = symbol_result; + sym_cpy = rspamd_mempool_strdup(task->task_pool, symbol); + k = kh_put(rspamd_symbols_hash, metric_res->symbols, + sym_cpy, &ret); + g_assert(ret > 0); + symbol_result = rspamd_mempool_alloc0(task->task_pool, sizeof(*symbol_result)); + kh_value(metric_res->symbols, k) = symbol_result; /* Handle grow factor */ if (metric_res->grow_factor && final_score > 0) { @@ -453,8 +458,8 @@ insert_metric_result (struct rspamd_task *task, next_gf = task->cfg->grow_factor; } - msg_debug_metric ("adjust grow factor to %.2f for symbol %s (%.2f final)", - next_gf, symbol, final_score); + msg_debug_metric("adjust grow factor to %.2f for symbol %s (%.2f final)", + next_gf, symbol, final_score); symbol_result->name = sym_cpy; symbol_result->sym = sdef; @@ -462,39 +467,41 @@ insert_metric_result (struct rspamd_task *task, if (sdef) { /* Check group limits */ - PTR_ARRAY_FOREACH (sdef->groups, i, gr) { + PTR_ARRAY_FOREACH(sdef->groups, i, gr) + { gdouble cur_score; - k = kh_get (rspamd_symbols_group_hash, metric_res->sym_groups, gr); - g_assert (k != kh_end (metric_res->sym_groups)); - gr_score = &kh_value (metric_res->sym_groups, k); - cur_score = rspamd_check_group_score (task, symbol, gr, - gr_score, final_score); + k = kh_get(rspamd_symbols_group_hash, metric_res->sym_groups, gr); + g_assert(k != kh_end(metric_res->sym_groups)); + gr_score = &kh_value(metric_res->sym_groups, k); + cur_score = rspamd_check_group_score(task, symbol, gr, + gr_score, final_score); - if (isnan (cur_score)) { + if (isnan(cur_score)) { /* Limit reached, do not add result */ - msg_debug_metric ( - "group limit %.2f is reached for %s when inserting symbol %s;" - " drop score %.2f", - *gr_score, gr->name, symbol, final_score); + msg_debug_metric( + "group limit %.2f is reached for %s when inserting symbol %s;" + " drop score %.2f", + *gr_score, gr->name, symbol, final_score); final_score = NAN; break; - } else if (gr_score) { + } + else if (gr_score) { *gr_score += cur_score; if (cur_score < final_score) { /* Reduce */ - msg_debug_metric ( - "group limit %.2f is reached for %s when inserting symbol %s;" - " reduce score %.2f - %.2f", - *gr_score, gr->name, symbol, final_score, cur_score); + msg_debug_metric( + "group limit %.2f is reached for %s when inserting symbol %s;" + " reduce score %.2f - %.2f", + *gr_score, gr->name, symbol, final_score, cur_score); final_score = cur_score; } } } } - if (!isnan (final_score)) { + if (!isnan(final_score)) { const double epsilon = DBL_EPSILON; metric_res->score += final_score; @@ -502,12 +509,12 @@ insert_metric_result (struct rspamd_task *task, symbol_result->score = final_score; if (final_score > epsilon) { - metric_res->npositive ++; + metric_res->npositive++; metric_res->positive_score += final_score; } else if (final_score < -epsilon) { - metric_res->nnegative ++; - metric_res->negative_score += fabs (final_score); + metric_res->nnegative++; + metric_res->negative_score += fabs(final_score); } } else { @@ -515,77 +522,78 @@ insert_metric_result (struct rspamd_task *task, } if (opt) { - rspamd_task_add_result_option (task, symbol_result, opt, strlen (opt)); + rspamd_task_add_result_option(task, symbol_result, opt, strlen(opt)); } } - msg_debug_metric ("final insertion for symbol %s, score %.2f, factor: %f", - symbol, - symbol_result->score, - final_score); - metric_res->nresults ++; + msg_debug_metric("final insertion for symbol %s, score %.2f, factor: %f", + symbol, + symbol_result->score, + final_score); + metric_res->nresults++; return symbol_result; } struct rspamd_symbol_result * -rspamd_task_insert_result_full (struct rspamd_task *task, - const gchar *symbol, - double weight, - const gchar *opt, - enum rspamd_symbol_insert_flags flags, - struct rspamd_scan_result *result) +rspamd_task_insert_result_full(struct rspamd_task *task, + const gchar *symbol, + double weight, + const gchar *opt, + enum rspamd_symbol_insert_flags flags, + struct rspamd_scan_result *result) { struct rspamd_symbol_result *symbol_result = NULL, *ret = NULL; struct rspamd_scan_result *mres; if (task->processed_stages & (RSPAMD_TASK_STAGE_IDEMPOTENT >> 1)) { - msg_err_task ("cannot insert symbol %s on idempotent phase", - symbol); + msg_err_task("cannot insert symbol %s on idempotent phase", + symbol); return NULL; } if (result == NULL) { /* Insert everywhere */ - DL_FOREACH (task->result, mres) { + DL_FOREACH(task->result, mres) + { if (mres->symbol_cbref != -1) { /* Check if we can insert this symbol to this symbol result */ GError *err = NULL; lua_State *L = (lua_State *) task->cfg->lua_state; - if (!rspamd_lua_universal_pcall (L, mres->symbol_cbref, - G_STRLOC, 1, "uss", &err, - "rspamd{task}", task, symbol, mres->name ? mres->name : "default")) { - msg_warn_task ("cannot call for symbol_cbref for result %s: %e", - mres->name ? mres->name : "default", err); - g_error_free (err); + if (!rspamd_lua_universal_pcall(L, mres->symbol_cbref, + G_STRLOC, 1, "uss", &err, + "rspamd{task}", task, symbol, mres->name ? mres->name : "default")) { + msg_warn_task("cannot call for symbol_cbref for result %s: %e", + mres->name ? mres->name : "default", err); + g_error_free(err); continue; } else { - if (!lua_toboolean (L, -1)) { + if (!lua_toboolean(L, -1)) { /* Skip symbol */ - msg_debug_metric ("skip symbol %s for result %s due to Lua return value", - symbol, mres->name); - lua_pop (L, 1); /* Remove result */ + msg_debug_metric("skip symbol %s for result %s due to Lua return value", + symbol, mres->name); + lua_pop(L, 1); /* Remove result */ continue; } - lua_pop (L, 1); /* Remove result */ + lua_pop(L, 1); /* Remove result */ } } bool new_symbol = false; - symbol_result = insert_metric_result (task, - symbol, - weight, - opt, - mres, - flags, - &new_symbol); + symbol_result = insert_metric_result(task, + symbol, + weight, + opt, + mres, + flags, + &new_symbol); if (mres->name == NULL) { /* Default result */ @@ -593,34 +601,34 @@ rspamd_task_insert_result_full (struct rspamd_task *task, /* Process cache item */ if (symbol_result && task->cfg->cache && symbol_result->sym && symbol_result->nshots == 1) { - rspamd_symcache_inc_frequency (task->cfg->cache, - symbol_result->sym->cache_item, - symbol_result->sym->name); + rspamd_symcache_inc_frequency(task->cfg->cache, + symbol_result->sym->cache_item, + symbol_result->sym->name); } } else if (new_symbol) { /* O(N) but we normally don't have any shadow results */ - LL_APPEND (ret, symbol_result); + LL_APPEND(ret, symbol_result); } } } else { /* Specific insertion */ - symbol_result = insert_metric_result (task, - symbol, - weight, - opt, - result, - flags, - NULL); + symbol_result = insert_metric_result(task, + symbol, + weight, + opt, + result, + flags, + NULL); ret = symbol_result; if (result->name == NULL) { /* Process cache item */ if (symbol_result && task->cfg->cache && symbol_result->sym && symbol_result->nshots == 1) { - rspamd_symcache_inc_frequency (task->cfg->cache, - symbol_result->sym->cache_item, - symbol_result->sym->name); + rspamd_symcache_inc_frequency(task->cfg->cache, + symbol_result->sym->cache_item, + symbol_result->sym->name); } } } @@ -629,10 +637,10 @@ rspamd_task_insert_result_full (struct rspamd_task *task, } static gchar * -rspamd_task_option_safe_copy (struct rspamd_task *task, - const gchar *val, - gsize vlen, - gsize *outlen) +rspamd_task_option_safe_copy(struct rspamd_task *task, + const gchar *val, + gsize vlen, + gsize *outlen) { const gchar *p, *end; @@ -645,37 +653,37 @@ rspamd_task_option_safe_copy (struct rspamd_task *task, UChar32 uc; gint off = 0; - U8_NEXT (p, off, end - p, uc); + U8_NEXT(p, off, end - p, uc); if (uc > 0) { - if (u_isprint (uc)) { + if (u_isprint(uc)) { vlen += off; } else { /* We will replace it with 0xFFFD */ - vlen += MAX (off, 3); + vlen += MAX(off, 3); } } else { - vlen += MAX (off, 3); + vlen += MAX(off, 3); } p += off; } - else if (!g_ascii_isprint (*p)) { + else if (!g_ascii_isprint(*p)) { /* Another 0xFFFD */ vlen += 3; - p ++; + p++; } else { - p ++; - vlen ++; + p++; + vlen++; } } gchar *dest, *d; - dest = rspamd_mempool_alloc (task->task_pool, vlen + 1); + dest = rspamd_mempool_alloc(task->task_pool, vlen + 1); d = dest; p = val; @@ -684,11 +692,11 @@ rspamd_task_option_safe_copy (struct rspamd_task *task, UChar32 uc; gint off = 0; - U8_NEXT (p, off, end - p, uc); + U8_NEXT(p, off, end - p, uc); if (uc > 0) { - if (u_isprint (uc)) { - memcpy (d, p, off); + if (u_isprint(uc)) { + memcpy(d, p, off); d += off; } else { @@ -706,12 +714,12 @@ rspamd_task_option_safe_copy (struct rspamd_task *task, p += off; } - else if (!g_ascii_isprint (*p)) { + else if (!g_ascii_isprint(*p)) { /* Another 0xFFFD */ *d++ = '\357'; *d++ = '\277'; *d++ = '\275'; - p ++; + p++; } else { *d++ = *p++; @@ -725,10 +733,10 @@ rspamd_task_option_safe_copy (struct rspamd_task *task, } gboolean -rspamd_task_add_result_option (struct rspamd_task *task, - struct rspamd_symbol_result *s, - const gchar *val, - gsize vlen) +rspamd_task_add_result_option(struct rspamd_task *task, + struct rspamd_symbol_result *s, + const gchar *val, + gsize vlen) { struct rspamd_symbol_option *opt, srch; gboolean ret = FALSE; @@ -744,23 +752,24 @@ rspamd_task_add_result_option (struct rspamd_task *task, * symbol from the default result, not some shadow result, or * the option insertion will be wrong */ - LL_FOREACH (s, cur) { + LL_FOREACH(s, cur) + { if (cur->opts_len < 0) { /* Cannot add more options, give up */ - msg_debug_task ("cannot add more options to symbol %s when adding option %s", - cur->name, val); + msg_debug_task("cannot add more options to symbol %s when adding option %s", + cur->name, val); ret = FALSE; continue; } if (!cur->options) { - cur->options = kh_init (rspamd_options_hash); + cur->options = kh_init(rspamd_options_hash); } if (vlen + cur->opts_len > task->cfg->max_opts_len) { /* Add truncated option */ - msg_info_task ("cannot add more options to symbol %s when adding option %s", - cur->name, val); + msg_info_task("cannot add more options to symbol %s when adding option %s", + cur->name, val); val = "..."; vlen = 3; cur->opts_len = -1; @@ -770,23 +779,23 @@ rspamd_task_add_result_option (struct rspamd_task *task, srch.option = (gchar *) val; srch.optlen = vlen; - k = kh_get (rspamd_options_hash, cur->options, &srch); + k = kh_get(rspamd_options_hash, cur->options, &srch); - if (k == kh_end (cur->options)) { - opt_cpy = rspamd_task_option_safe_copy (task, val, vlen, &cpy_len); + if (k == kh_end(cur->options)) { + opt_cpy = rspamd_task_option_safe_copy(task, val, vlen, &cpy_len); if (cpy_len != vlen) { srch.option = (gchar *) opt_cpy; srch.optlen = cpy_len; - k = kh_get (rspamd_options_hash, cur->options, &srch); + k = kh_get(rspamd_options_hash, cur->options, &srch); } /* Append new options */ - if (k == kh_end (cur->options)) { - opt = rspamd_mempool_alloc0 (task->task_pool, sizeof(*opt)); + if (k == kh_end(cur->options)) { + opt = rspamd_mempool_alloc0(task->task_pool, sizeof(*opt)); opt->optlen = cpy_len; opt->option = opt_cpy; - kh_put (rspamd_options_hash, cur->options, opt, &r); - DL_APPEND (cur->opts_head, opt); + kh_put(rspamd_options_hash, cur->options, opt, &r); + DL_APPEND(cur->opts_head, opt); if (s == cur) { ret = TRUE; @@ -810,16 +819,16 @@ rspamd_task_add_result_option (struct rspamd_task *task, ret = TRUE; } - task->result->nresults ++; + task->result->nresults++; return ret; } struct rspamd_action_config * -rspamd_find_action_config_for_action (struct rspamd_scan_result *scan_result, - struct rspamd_action *act) +rspamd_find_action_config_for_action(struct rspamd_scan_result *scan_result, + struct rspamd_action *act) { - for (unsigned int i = 0; i < scan_result->nactions; i ++) { + for (unsigned int i = 0; i < scan_result->nactions; i++) { struct rspamd_action_config *cur = &scan_result->actions_config[i]; if (act == cur->action) { @@ -831,12 +840,12 @@ rspamd_find_action_config_for_action (struct rspamd_scan_result *scan_result, } struct rspamd_action * -rspamd_check_action_metric (struct rspamd_task *task, - struct rspamd_passthrough_result **ppr, - struct rspamd_scan_result *scan_result) +rspamd_check_action_metric(struct rspamd_task *task, + struct rspamd_passthrough_result **ppr, + struct rspamd_scan_result *scan_result) { struct rspamd_action_config *action_lim, - *noaction = NULL; + *noaction = NULL; struct rspamd_action *selected_action = NULL, *least_action = NULL; struct rspamd_passthrough_result *pr, *sel_pr = NULL; double max_score = -(G_MAXDOUBLE), sc; @@ -846,10 +855,11 @@ rspamd_check_action_metric (struct rspamd_task *task, scan_result = task->result; } - if (scan_result->passthrough_result != NULL) { - DL_FOREACH (scan_result->passthrough_result, pr) { + if (scan_result->passthrough_result != NULL) { + DL_FOREACH(scan_result->passthrough_result, pr) + { struct rspamd_action_config *act_config = - rspamd_find_action_config_for_action (scan_result, pr->action); + rspamd_find_action_config_for_action(scan_result, pr->action); /* Skip disabled actions */ if (act_config && (act_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) { @@ -861,9 +871,9 @@ rspamd_check_action_metric (struct rspamd_task *task, selected_action = pr->action; if (!(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) { - if (!isnan (sc)) { + if (!isnan(sc)) { if (pr->action->action_type == METRIC_ACTION_NOACTION) { - scan_result->score = MIN (sc, scan_result->score); + scan_result->score = MIN(sc, scan_result->score); } else { scan_result->score = sc; @@ -880,7 +890,7 @@ rspamd_check_action_metric (struct rspamd_task *task, seen_least = true; least_action = selected_action; - if (isnan (sc)) { + if (isnan(sc)) { if (selected_action->flags & RSPAMD_ACTION_NO_THRESHOLD) { /* @@ -914,7 +924,7 @@ rspamd_check_action_metric (struct rspamd_task *task, /* * Select result by score */ - for (size_t i = scan_result->nactions - 1; i != (size_t)-1; i--) { + for (size_t i = scan_result->nactions - 1; i != (size_t) -1; i--) { action_lim = &scan_result->actions_config[i]; sc = action_lim->cur_limit; @@ -922,12 +932,12 @@ rspamd_check_action_metric (struct rspamd_task *task, noaction = action_lim; } - if ((action_lim->flags & (RSPAMD_ACTION_RESULT_DISABLED|RSPAMD_ACTION_RESULT_NO_THRESHOLD))) { + if ((action_lim->flags & (RSPAMD_ACTION_RESULT_DISABLED | RSPAMD_ACTION_RESULT_NO_THRESHOLD))) { continue; } - if (isnan (sc) || - (action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD|RSPAMD_ACTION_HAM))) { + if (isnan(sc) || + (action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) { continue; } @@ -947,7 +957,7 @@ rspamd_check_action_metric (struct rspamd_task *task, /* Adjust least action */ if (least_action->flags & RSPAMD_ACTION_NO_THRESHOLD) { if (selected_action->action_type != METRIC_ACTION_REJECT && - selected_action->action_type != METRIC_ACTION_DISCARD) { + selected_action->action_type != METRIC_ACTION_DISCARD) { /* Override score based action with least action */ selected_action = least_action; @@ -979,8 +989,8 @@ rspamd_check_action_metric (struct rspamd_task *task, } struct rspamd_symbol_result * -rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym, - struct rspamd_scan_result *result) +rspamd_task_find_symbol_result(struct rspamd_task *task, const char *sym, + struct rspamd_scan_result *result) { struct rspamd_symbol_result *res = NULL; khiter_t k; @@ -992,17 +1002,17 @@ rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym, k = kh_get(rspamd_symbols_hash, result->symbols, sym); - if (k != kh_end (result->symbols)) { - res = kh_value (result->symbols, k); + if (k != kh_end(result->symbols)) { + res = kh_value(result->symbols, k); } return res; } -struct rspamd_symbol_result* rspamd_task_remove_symbol_result ( - struct rspamd_task *task, - const gchar *symbol, - struct rspamd_scan_result *result) +struct rspamd_symbol_result *rspamd_task_remove_symbol_result( + struct rspamd_task *task, + const gchar *symbol, + struct rspamd_scan_result *result) { struct rspamd_symbol_result *res = NULL; khiter_t k; @@ -1012,12 +1022,12 @@ struct rspamd_symbol_result* rspamd_task_remove_symbol_result ( result = task->result; } - k = kh_get (rspamd_symbols_hash, result->symbols, symbol); + k = kh_get(rspamd_symbols_hash, result->symbols, symbol); - if (k != kh_end (result->symbols)) { - res = kh_value (result->symbols, k); + if (k != kh_end(result->symbols)) { + res = kh_value(result->symbols, k); - if (!isnan (res->score)) { + if (!isnan(res->score)) { /* Remove score from the result */ result->score -= res->score; @@ -1027,14 +1037,15 @@ struct rspamd_symbol_result* rspamd_task_remove_symbol_result ( gint i; khiter_t k_groups; - PTR_ARRAY_FOREACH (res->sym->groups, i, gr) { + PTR_ARRAY_FOREACH(res->sym->groups, i, gr) + { gdouble *gr_score; - k_groups = kh_get (rspamd_symbols_group_hash, - result->sym_groups, gr); + k_groups = kh_get(rspamd_symbols_group_hash, + result->sym_groups, gr); - if (k_groups != kh_end (result->sym_groups)) { - gr_score = &kh_value (result->sym_groups, k_groups); + if (k_groups != kh_end(result->sym_groups)) { + gr_score = &kh_value(result->sym_groups, k_groups); if (gr_score) { *gr_score -= res->score; @@ -1044,7 +1055,7 @@ struct rspamd_symbol_result* rspamd_task_remove_symbol_result ( } } - kh_del (rspamd_symbols_hash, result->symbols, k); + kh_del(rspamd_symbols_hash, result->symbols, k); } else { return NULL; @@ -1053,10 +1064,9 @@ struct rspamd_symbol_result* rspamd_task_remove_symbol_result ( return res; } -void -rspamd_task_symbol_result_foreach (struct rspamd_task *task, - struct rspamd_scan_result *result, GHFunc func, - gpointer ud) +void rspamd_task_symbol_result_foreach(struct rspamd_task *task, + struct rspamd_scan_result *result, GHFunc func, + gpointer ud) { const gchar *kk; struct rspamd_symbol_result *res; @@ -1067,27 +1077,28 @@ rspamd_task_symbol_result_foreach (struct rspamd_task *task, } if (func) { - kh_foreach (result->symbols, kk, res, { - func ((gpointer)kk, (gpointer)res, ud); + kh_foreach(result->symbols, kk, res, { + func((gpointer) kk, (gpointer) res, ud); }); } } struct rspamd_scan_result * -rspamd_find_metric_result (struct rspamd_task *task, - const gchar *name) +rspamd_find_metric_result(struct rspamd_task *task, + const gchar *name) { struct rspamd_scan_result *res; if (name == NULL) { return task->result; } - else if (strcmp (name, "default") == 0) { + else if (strcmp(name, "default") == 0) { return task->result; } - DL_FOREACH (task->result, res) { - if (res->name && strcmp (res->name, name) == 0) { + DL_FOREACH(task->result, res) + { + if (res->name && strcmp(res->name, name) == 0) { return res; } } diff --git a/src/libmime/scan_result.h b/src/libmime/scan_result.h index ec0aa9f4d..da3fb1608 100644 --- a/src/libmime/scan_result.h +++ b/src/libmime/scan_result.h @@ -10,7 +10,7 @@ #include "rspamd_symcache.h" #include "task.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -35,12 +35,12 @@ struct kh_rspamd_options_hash_s; * Rspamd symbol */ struct rspamd_symbol_result { - double score; /**< symbol's score */ - struct kh_rspamd_options_hash_s *options; /**< list of symbol's options */ - struct rspamd_symbol_option *opts_head; /**< head of linked list of options */ + double score; /**< symbol's score */ + struct kh_rspamd_options_hash_s *options; /**< list of symbol's options */ + struct rspamd_symbol_option *opts_head; /**< head of linked list of options */ const gchar *name; - struct rspamd_symbol *sym; /**< symbol configuration */ - gssize opts_len; /**< total size of all options (negative if truncated option is added) */ + struct rspamd_symbol *sym; /**< symbol configuration */ + gssize opts_len; /**< total size of all options (negative if truncated option is added) */ guint nshots; int flags; struct rspamd_symbol_result *next; @@ -83,23 +83,23 @@ struct kh_rspamd_symbols_group_hash_s; struct rspamd_scan_result { - double score; /**< total score */ - double grow_factor; /**< current grow factor */ + double score; /**< total score */ + double grow_factor; /**< current grow factor */ struct rspamd_passthrough_result *passthrough_result; double positive_score; double negative_score; - struct kh_rspamd_symbols_hash_s *symbols; /**< symbols of metric */ + struct kh_rspamd_symbols_hash_s *symbols; /**< symbols of metric */ struct kh_rspamd_symbols_group_hash_s *sym_groups; /**< groups of symbols */ struct rspamd_action_config *actions_config; - const gchar *name; /**< for named results, NULL is the default result */ - struct rspamd_task *task; /**< back reference */ - gint symbol_cbref; /**< lua function that defines if a symbol can be inserted, -1 if unused */ + const gchar *name; /**< for named results, NULL is the default result */ + struct rspamd_task *task; /**< back reference */ + gint symbol_cbref; /**< lua function that defines if a symbol can be inserted, -1 if unused */ guint nactions; guint npositive; guint nnegative; - guint nresults; /**< all results: positive, negative, passthrough etc */ - guint nresults_postfilters; /**< how many results are there before postfilters stage */ - struct rspamd_scan_result *prev, *next; /**< double linked list of results */ + guint nresults; /**< all results: positive, negative, passthrough etc */ + guint nresults_postfilters; /**< how many results are there before postfilters stage */ + struct rspamd_scan_result *prev, *next; /**< double linked list of results */ }; /** @@ -107,8 +107,8 @@ struct rspamd_scan_result { * @param task task object * @return metric result or NULL if metric `name` has not been found */ -struct rspamd_scan_result *rspamd_create_metric_result (struct rspamd_task *task, - const gchar *name, gint lua_sym_cbref); +struct rspamd_scan_result *rspamd_create_metric_result(struct rspamd_task *task, + const gchar *name, gint lua_sym_cbref); /** * Find result with a specific name (NULL means the default result) @@ -116,8 +116,8 @@ struct rspamd_scan_result *rspamd_create_metric_result (struct rspamd_task *task * @param name * @return */ -struct rspamd_scan_result *rspamd_find_metric_result (struct rspamd_task *task, - const gchar *name); +struct rspamd_scan_result *rspamd_find_metric_result(struct rspamd_task *task, + const gchar *name); /** * Adds a new passthrough result to a task @@ -128,11 +128,11 @@ struct rspamd_scan_result *rspamd_find_metric_result (struct rspamd_task *task, * @param message * @param module */ -bool rspamd_add_passthrough_result (struct rspamd_task *task, - struct rspamd_action *action, guint priority, - double target_score, const gchar *message, - const gchar *module, guint flags, - struct rspamd_scan_result *scan_result); +bool rspamd_add_passthrough_result(struct rspamd_task *task, + struct rspamd_action *action, guint priority, + double target_score, const gchar *message, + const gchar *module, guint flags, + struct rspamd_scan_result *scan_result); enum rspamd_symbol_insert_flags { RSPAMD_SYMBOL_INSERT_DEFAULT = 0, @@ -148,17 +148,17 @@ enum rspamd_symbol_insert_flags { * @param weight numeric weight for symbol * @param opts list of symbol's options */ -struct rspamd_symbol_result *rspamd_task_insert_result_full (struct rspamd_task *task, - const gchar *symbol, - double weight, - const gchar *opts, - enum rspamd_symbol_insert_flags flags, - struct rspamd_scan_result *result); +struct rspamd_symbol_result *rspamd_task_insert_result_full(struct rspamd_task *task, + const gchar *symbol, + double weight, + const gchar *opts, + enum rspamd_symbol_insert_flags flags, + struct rspamd_scan_result *result); #define rspamd_task_insert_result_single(task, symbol, weight, opts) \ - rspamd_task_insert_result_full ((task), (symbol), (weight), (opts), RSPAMD_SYMBOL_INSERT_SINGLE, NULL) + rspamd_task_insert_result_full((task), (symbol), (weight), (opts), RSPAMD_SYMBOL_INSERT_SINGLE, NULL) #define rspamd_task_insert_result(task, symbol, weight, opts) \ - rspamd_task_insert_result_full ((task), (symbol), (weight), (opts), RSPAMD_SYMBOL_INSERT_DEFAULT, NULL) + rspamd_task_insert_result_full((task), (symbol), (weight), (opts), RSPAMD_SYMBOL_INSERT_DEFAULT, NULL) /** * Removes a symbol from a specific symbol result @@ -167,20 +167,20 @@ struct rspamd_symbol_result *rspamd_task_insert_result_full (struct rspamd_task * @param result * @return */ -struct rspamd_symbol_result* rspamd_task_remove_symbol_result ( - struct rspamd_task *task, - const gchar *symbol, - struct rspamd_scan_result *result); +struct rspamd_symbol_result *rspamd_task_remove_symbol_result( + struct rspamd_task *task, + const gchar *symbol, + struct rspamd_scan_result *result); /** * Adds new option to symbol * @param task * @param s * @param opt */ -gboolean rspamd_task_add_result_option (struct rspamd_task *task, - struct rspamd_symbol_result *s, - const gchar *opt, - gsize vlen); +gboolean rspamd_task_add_result_option(struct rspamd_task *task, + struct rspamd_symbol_result *s, + const gchar *opt, + gsize vlen); /** * Finds symbol result @@ -189,8 +189,8 @@ gboolean rspamd_task_add_result_option (struct rspamd_task *task, * @return */ struct rspamd_symbol_result * -rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym, - struct rspamd_scan_result *result); +rspamd_task_find_symbol_result(struct rspamd_task *task, const char *sym, + struct rspamd_scan_result *result); /** * Compatibility function to iterate on symbols hash @@ -198,10 +198,10 @@ rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym, * @param func * @param ud */ -void rspamd_task_symbol_result_foreach (struct rspamd_task *task, - struct rspamd_scan_result *result, - GHFunc func, - gpointer ud); +void rspamd_task_symbol_result_foreach(struct rspamd_task *task, + struct rspamd_scan_result *result, + GHFunc func, + gpointer ud); /** * Default consolidation function for metric, it get all symbols and multiply symbol @@ -210,9 +210,9 @@ void rspamd_task_symbol_result_foreach (struct rspamd_task *task, * @param metric_name name of metric * @return result metric weight */ -double rspamd_factor_consolidation_func (struct rspamd_task *task, - const gchar *metric_name, - const gchar *unused); +double rspamd_factor_consolidation_func(struct rspamd_task *task, + const gchar *metric_name, + const gchar *unused); /** @@ -220,14 +220,14 @@ double rspamd_factor_consolidation_func (struct rspamd_task *task, * @param task * @return */ -struct rspamd_action *rspamd_check_action_metric (struct rspamd_task *task, - struct rspamd_passthrough_result **ppr, - struct rspamd_scan_result *scan_result); +struct rspamd_action *rspamd_check_action_metric(struct rspamd_task *task, + struct rspamd_passthrough_result **ppr, + struct rspamd_scan_result *scan_result); -struct rspamd_action_config *rspamd_find_action_config_for_action (struct rspamd_scan_result *scan_result, - struct rspamd_action *act); +struct rspamd_action_config *rspamd_find_action_config_for_action(struct rspamd_scan_result *scan_result, + struct rspamd_action *act); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libmime/scan_result_private.h b/src/libmime/scan_result_private.h index 39e544146..cf0c0c544 100644 --- a/src/libmime/scan_result_private.h +++ b/src/libmime/scan_result_private.h @@ -8,48 +8,48 @@ #include "scan_result.h" #include "contrib/libucl/khash.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif #define RSPAMD_OPTS_SEED 0x9f1f608628a4fefbULL -#define rspamd_symopt_hash(opt) (rspamd_cryptobox_fast_hash ( \ - ((struct rspamd_symbol_option *)opt)->option, \ - ((struct rspamd_symbol_option *)opt)->optlen, RSPAMD_OPTS_SEED)) +#define rspamd_symopt_hash(opt) (rspamd_cryptobox_fast_hash( \ + ((struct rspamd_symbol_option *) opt)->option, \ + ((struct rspamd_symbol_option *) opt)->optlen, RSPAMD_OPTS_SEED)) static inline bool -rspamd_symopt_equal (const struct rspamd_symbol_option *o1, - const struct rspamd_symbol_option *o2) +rspamd_symopt_equal(const struct rspamd_symbol_option *o1, + const struct rspamd_symbol_option *o2) { if (o1->optlen == o2->optlen) { - return (memcmp (o1->option, o2->option, o1->optlen) == 0); + return (memcmp(o1->option, o2->option, o1->optlen) == 0); } return false; } -KHASH_INIT (rspamd_options_hash, struct rspamd_symbol_option *, char, - 0, rspamd_symopt_hash, rspamd_symopt_equal); +KHASH_INIT(rspamd_options_hash, struct rspamd_symbol_option *, char, + 0, rspamd_symopt_hash, rspamd_symopt_equal); /** * Result of metric processing */ -KHASH_MAP_INIT_STR (rspamd_symbols_hash, struct rspamd_symbol_result *); +KHASH_MAP_INIT_STR(rspamd_symbols_hash, struct rspamd_symbol_result *); #if UINTPTR_MAX <= UINT_MAX /* 32 bit */ -#define rspamd_ptr_hash_func(key) (khint32_t)(((uintptr_t)(key))>>1) +#define rspamd_ptr_hash_func(key) (khint32_t)(((uintptr_t) (key)) >> 1) #else /* likely 64 bit */ -#define rspamd_ptr_hash_func(key) (khint32_t)(((uintptr_t)(key))>>3) +#define rspamd_ptr_hash_func(key) (khint32_t)(((uintptr_t) (key)) >> 3) #endif #define rspamd_ptr_equal_func(a, b) ((a) == (b)) -KHASH_INIT (rspamd_symbols_group_hash, - void *, - double, - 1, - rspamd_ptr_hash_func, - rspamd_ptr_equal_func); - -#ifdef __cplusplus +KHASH_INIT(rspamd_symbols_group_hash, + void *, + double, + 1, + rspamd_ptr_hash_func, + rspamd_ptr_equal_func); + +#ifdef __cplusplus } #endif -#endif //RSPAMD_SCAN_RESULT_PRIVATE_H +#endif//RSPAMD_SCAN_RESULT_PRIVATE_H diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h index 0d2c4044d..e188b6389 100644 --- a/src/libmime/smtp_parsers.h +++ b/src/libmime/smtp_parsers.h @@ -23,28 +23,28 @@ #include "message.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif -int rspamd_smtp_addr_parse (const char *data, size_t len, - struct rspamd_email_address *addr); +int rspamd_smtp_addr_parse(const char *data, size_t len, + struct rspamd_email_address *addr); -gboolean rspamd_content_disposition_parser (const char *data, size_t len, - struct rspamd_content_disposition *cd, - rspamd_mempool_t *pool); +gboolean rspamd_content_disposition_parser(const char *data, size_t len, + struct rspamd_content_disposition *cd, + rspamd_mempool_t *pool); gboolean -rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding, - const gchar **charset, gsize *charset_len, - const gchar **encoded, gsize *encoded_len); +rspamd_rfc2047_parser(const gchar *in, gsize len, gint *pencoding, + const gchar **charset, gsize *charset_len, + const gchar **encoded, gsize *encoded_len); -rspamd_inet_addr_t *rspamd_parse_smtp_ip (const char *data, size_t len, - rspamd_mempool_t *pool); +rspamd_inet_addr_t *rspamd_parse_smtp_ip(const char *data, size_t len, + rspamd_mempool_t *pool); -guint64 rspamd_parse_smtp_date (const unsigned char *data, size_t len, GError **err); +guint64 rspamd_parse_smtp_date(const unsigned char *data, size_t len, GError **err); -#ifdef __cplusplus +#ifdef __cplusplus } #endif |