aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-09-09 16:26:16 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-09-09 16:26:16 +0100
commit56e236efa012c4be6b3893314ce4d3a570e16327 (patch)
treed578ac358f7ae3cb4e303af195def103e55a311c
parent4283a774fdfd5582ab516387a7a77969c40bb56f (diff)
downloadrspamd-56e236efa012c4be6b3893314ce4d3a570e16327.tar.gz
rspamd-56e236efa012c4be6b3893314ce4d3a570e16327.zip
[Rework] No more magic
-rw-r--r--CMakeLists.txt2
-rw-r--r--src/libmime/message.c225
-rw-r--r--src/libmime/message.h2
-rw-r--r--src/libserver/cfg_file.h1
-rw-r--r--src/libserver/cfg_rcl.c6
-rw-r--r--src/libutil/util.c37
-rw-r--r--src/rspamd.h3
7 files changed, 116 insertions, 160 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 22c4b817b..952214391 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -638,8 +638,6 @@ ProcessPackage(LIBCRYPT LIBRARY crypto INCLUDE openssl/evp.h
ROOT ${OPENSSL_ROOT_DIR} MODULES openssl libcrypt)
ProcessPackage(LIBSSL LIBRARY ssl INCLUDE openssl/ssl.h
ROOT ${OPENSSL_ROOT_DIR} MODULES openssl libssl)
-ProcessPackage(MAGIC LIBRARY magic INCLUDE magic.h INCLUDE_SUFFIXES include/libmagic
- ROOT ${LIBMAGIC_ROOT_DIR} MODULES magic)
ProcessPackage(LIBZ LIBRARY z INCLUDE zlib.h INCLUDE_SUFFIXES include/zlib
ROOT ${LIBZ_ROOT_DIR} MODULES z)
ProcessPackage(SODIUM LIBRARY sodium INCLUDE sodium.h
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 92fa1f51b..00067ee83 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -818,98 +818,19 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
if (IS_CT_TEXT (mime_part->ct) && (!mime_part->detected_ct ||
IS_CT_TEXT (mime_part->detected_ct))) {
+ found_txt = TRUE;
+
html_tok.begin = "html";
html_tok.len = 4;
xhtml_tok.begin = "xhtml";
xhtml_tok.len = 5;
if (rspamd_ftok_casecmp (&mime_part->ct->subtype, &html_tok) == 0 ||
- rspamd_ftok_casecmp (&mime_part->ct->subtype, &xhtml_tok) == 0) {
+ rspamd_ftok_casecmp (&mime_part->ct->subtype, &xhtml_tok) == 0 ||
+ (mime_part->detected_ct &&
+ rspamd_ftok_casecmp (&mime_part->detected_ct->subtype, &html_tok) == 0)) {
found_html = TRUE;
}
- else {
- /*
- * We also need to apply heuristic for text parts that are actually
- * HTML.
- */
- RSPAMD_FTOK_ASSIGN (&html_tok, "<!DOCTYPE html");
- RSPAMD_FTOK_ASSIGN (&xhtml_tok, "<html");
-
- if (mime_part->parsed_data.len >= xhtml_tok.len &&
- rspamd_lc_cmp (mime_part->parsed_data.begin,
- xhtml_tok.begin, xhtml_tok.len) == 0) {
- found_html = TRUE;
- }
- else if (mime_part->parsed_data.len >= html_tok.len &&
- rspamd_lc_cmp (mime_part->parsed_data.begin,
- html_tok.begin, html_tok.len) == 0) {
- found_html = TRUE;
- }
- else {
- /* We need to be extra careful with some stupid things here */
-
- html_tok.begin = "plain";
- html_tok.len = 5;
-
- if (rspamd_ftok_casecmp (&mime_part->ct->subtype, &html_tok) == 0) {
- found_txt = TRUE;
- }
- else {
- if (mime_part->cd && mime_part->cd->filename.len > 4) {
- const gchar *pos = mime_part->cd->filename.begin +
- mime_part->cd->filename.len -
- sizeof (".txt") + 1;
- if (rspamd_lc_cmp (pos, ".txt", sizeof ("txt") - 1) == 0) {
- found_txt = TRUE;
- }
- else {
- msg_debug_task ("found mime part with incorrect content-type: %T/%T, "
- "filename: %T",
- &mime_part->ct->type,
- &mime_part->ct->subtype,
- &mime_part->cd->filename);
- }
- }
- else {
- /* For something like Content-Type: text */
- found_txt = TRUE;
- }
- }
- }
-
- if (found_html) {
- msg_info_task ("found html part pretending to be text/plain part");
- }
- }
- }
- else {
- /* Apply heuristic */
-
- if (mime_part->cd && mime_part->cd->filename.len > 4) {
- const gchar *pos = mime_part->cd->filename.begin +
- mime_part->cd->filename.len - sizeof (".htm") + 1;
-
- if (rspamd_lc_cmp (pos, ".htm", sizeof (".htm") - 1) == 0) {
- found_html = TRUE;
- }
- else if (rspamd_lc_cmp (pos, ".txt", sizeof ("txt") - 1) == 0) {
- found_txt = TRUE;
- }
- else if ( mime_part->cd->filename.len > 5) {
- pos = mime_part->cd->filename.begin +
- mime_part->cd->filename.len - sizeof (".html") + 1;
- if (rspamd_lc_cmp (pos, ".html", sizeof (".html") - 1) == 0) {
- found_html = TRUE;
- }
- }
- }
-
- if (found_txt || found_html) {
- msg_info_task ("found %s part with incorrect content-type: %T/%T",
- found_html ? "html" : "text",
- &mime_part->ct->type, &mime_part->ct->subtype);
- mime_part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
- }
}
/* Skip attachments */
@@ -1006,7 +927,7 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
{
struct rspamd_content_type *ct = NULL;
struct rspamd_mime_part *part;
- const char *mb = NULL;
+ const char *mb = "application/octet-stream";
gchar *mid;
rspamd_ftok_t srch, *tok;
gchar cdbuf[1024];
@@ -1015,6 +936,14 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part));
+ part->raw_data.begin = start;
+ part->raw_data.len = len;
+ part->parsed_data.begin = start;
+ part->parsed_data.len = len;
+ part->id = MESSAGE_FIELD (task, parts)->len;
+ part->raw_headers = rspamd_message_headers_new ();
+ part->headers_order = NULL;
+
tok = rspamd_task_get_request_header (task, "Content-Type");
if (tok) {
@@ -1023,11 +952,42 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
task->task_pool);
part->ct = ct;
}
+ else if (task->cfg && task->cfg->libs_ctx) {
+ lua_State *L = task->cfg->lua_state;
+
+ if (rspamd_lua_require_function (L,
+ "lua_magic", "detect_mime_part")) {
+
+ struct rspamd_mime_part **pmime;
+ struct rspamd_task **ptask;
- if (task->cfg && task->cfg->libs_ctx) {
- mb = magic_buffer (task->cfg->libs_ctx->libmagic,
- start,
- len);
+ pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
+ rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
+ *pmime = part;
+ ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
+ rspamd_lua_setclass (L, "rspamd{task}", -1);
+ *ptask = task;
+
+ if (lua_pcall (L, 2, 2, 0) != 0) {
+ msg_err_task ("cannot detect type: %s", lua_tostring (L, -1));
+ }
+ else {
+ if (lua_istable (L, -1)) {
+ lua_pushstring (L, "ct");
+ lua_gettable (L, -2);
+
+ if (lua_isstring (L, -1)) {
+ mb = rspamd_mempool_strdup (task->task_pool,
+ lua_tostring (L, -1));
+ }
+ }
+ }
+
+ lua_settop (L, 0);
+ }
+ else {
+ msg_err_task ("cannot require lua_magic.detect_mime_part");
+ }
if (mb) {
srch.begin = mb;
@@ -1059,13 +1019,6 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
}
}
- part->raw_data.begin = start;
- part->raw_data.len = len;
- part->parsed_data.begin = start;
- part->parsed_data.len = len;
- part->id = MESSAGE_FIELD (task, parts)->len;
- part->raw_headers = rspamd_message_headers_new ();
- part->headers_order = NULL;
tok = rspamd_task_get_request_header (task, "Filename");
@@ -1408,31 +1361,81 @@ rspamd_message_process (struct rspamd_task *task)
gdouble diff, *pdiff;
guint tw, *ptw, dw;
struct rspamd_mime_part *part;
+ lua_State *L = task->cfg->lua_state;
+ gint func_pos = -1;
rspamd_images_process (task);
rspamd_archives_process (task);
+ if (rspamd_lua_require_function (L,
+ "lua_magic", "detect_mime_part")) {
+ func_pos = lua_gettop (L);
+ }
+ else {
+ msg_err_task ("cannot require lua_magic.detect_mime_part");
+ }
+
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
- if (!rspamd_message_process_text_part_maybe (task, part) &&
- part->parsed_data.len > 0) {
- if (task->cfg) {
- const gchar *mb = magic_buffer (task->cfg->libs_ctx->libmagic,
- part->parsed_data.begin,
- part->parsed_data.len);
-
- if (mb) {
- rspamd_ftok_t srch;
-
- srch.begin = mb;
- srch.len = strlen (mb);
- part->detected_ct = rspamd_content_type_parse (srch.begin,
- srch.len,
- task->task_pool);
+ if (func_pos != -1) {
+ struct rspamd_mime_part **pmime;
+ struct rspamd_task **ptask;
+
+ lua_pushvalue (L, func_pos);
+ pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
+ rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
+ *pmime = part;
+ ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
+ rspamd_lua_setclass (L, "rspamd{task}", -1);
+ *ptask = task;
+
+ if (lua_pcall (L, 2, 2, 0) != 0) {
+ msg_err_task ("cannot detect type: %s", lua_tostring (L, -1));
+ }
+ else {
+ if (lua_istable (L, -1)) {
+ const gchar *mb;
+
+ /* First returned value */
+ part->detected_ext = rspamd_mempool_strdup (task->task_pool,
+ lua_tostring (L, -2));
+
+ lua_pushstring (L, "ct");
+ lua_gettable (L, -2);
+
+ if (lua_isstring (L, -1)) {
+ mb = lua_tostring (L, -1);
+
+ if (mb) {
+ rspamd_ftok_t srch;
+
+ srch.begin = mb;
+ srch.len = strlen (mb);
+ part->detected_ct = rspamd_content_type_parse (srch.begin,
+ srch.len,
+ task->task_pool);
+ }
+ }
+
+ lua_pop (L, 1);
+
+ lua_pushstring (L, "type");
+ lua_gettable (L, -2);
+
+ if (lua_isstring (L, -1)) {
+ part->detected_type = rspamd_mempool_strdup (task->task_pool,
+ lua_tostring (L, -1));
+ }
}
}
+
+ lua_settop (L, func_pos);
}
+
+ rspamd_message_process_text_part_maybe (task, part);
}
+ lua_settop (L, 0);
+
/* Calculate average words length and number of short words */
struct rspamd_mime_text_part *text_part;
gdouble *var;
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 651e1d457..374d3a7f9 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -56,6 +56,8 @@ struct rspamd_mime_multipart {
struct rspamd_mime_part {
struct rspamd_content_type *ct;
struct rspamd_content_type *detected_ct;
+ gchar *detected_type;
+ gchar *detected_ext;
struct rspamd_content_disposition *cd;
rspamd_ftok_t raw_data;
rspamd_ftok_t parsed_data;
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 4faca7b56..263d00f38 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -437,7 +437,6 @@ struct rspamd_config {
gchar *history_file; /**< file to save rolling history */
gchar *tld_file; /**< file to load effective tld list from */
gchar *hs_cache_dir; /**< directory to save hyperscan databases */
- gchar *magic_file; /**< file to initialize libmagic */
gdouble dns_timeout; /**< timeout in milliseconds for waiting for dns reply */
guint32 dns_retransmits; /**< maximum retransmits count */
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index 2bdb6adc6..fb2cbf052 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -2093,12 +2093,6 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections)
0,
"List of ssl ciphers (e.g. HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4)");
rspamd_rcl_add_default_handler (sub,
- "magic_file",
- rspamd_rcl_parse_struct_string,
- G_STRUCT_OFFSET (struct rspamd_config, magic_file),
- 0,
- "Path to a custom libmagic file");
- rspamd_rcl_add_default_handler (sub,
"max_message",
rspamd_rcl_parse_struct_integer,
G_STRUCT_OFFSET (struct rspamd_config, max_message),
diff --git a/src/libutil/util.c b/src/libutil/util.c
index 86358e46e..7877582c2 100644
--- a/src/libutil/util.c
+++ b/src/libutil/util.c
@@ -2364,35 +2364,6 @@ rspamd_init_libs (void)
rlim.rlim_max = rlim.rlim_cur;
setrlimit (RLIMIT_STACK, &rlim);
- gint magic_flags = 0;
-
- /* Unless trusty and other crap is supported... */
-#if 0
-#ifdef MAGIC_NO_CHECK_BUILTIN
- magic_flags = MAGIC_NO_CHECK_BUILTIN;
-#endif
-#endif
- magic_flags |= MAGIC_MIME|MAGIC_NO_CHECK_COMPRESS|
- MAGIC_NO_CHECK_ELF|MAGIC_NO_CHECK_TAR;
-#ifdef MAGIC_NO_CHECK_CDF
- magic_flags |= MAGIC_NO_CHECK_CDF;
-#endif
-#ifdef MAGIC_NO_CHECK_ENCODING
- magic_flags |= MAGIC_NO_CHECK_ENCODING;
-#endif
-#ifdef MAGIC_NO_CHECK_TAR
- magic_flags |= MAGIC_NO_CHECK_TAR;
-#endif
-#ifdef MAGIC_NO_CHECK_TEXT
- magic_flags |= MAGIC_NO_CHECK_TEXT;
-#endif
-#ifdef MAGIC_NO_CHECK_TOKENS
- magic_flags |= MAGIC_NO_CHECK_TOKENS;
-#endif
-#ifdef MAGIC_NO_CHECK_JSON
- magic_flags |= MAGIC_NO_CHECK_JSON;
-#endif
- ctx->libmagic = magic_open (magic_flags);
ctx->local_addrs = rspamd_inet_library_init ();
REF_INIT_RETAIN (ctx, rspamd_deinit_libs);
@@ -2473,10 +2444,6 @@ rspamd_config_libs (struct rspamd_external_libs_ctx *ctx,
}
}
- if (ctx->libmagic) {
- magic_load (ctx->libmagic, cfg->magic_file);
- }
-
rspamd_free_zstd_dictionary (ctx->in_dict);
rspamd_free_zstd_dictionary (ctx->out_dict);
@@ -2586,10 +2553,6 @@ void
rspamd_deinit_libs (struct rspamd_external_libs_ctx *ctx)
{
if (ctx != NULL) {
- if (ctx->libmagic) {
- magic_close (ctx->libmagic);
- }
-
g_free (ctx->ottery_cfg);
#ifdef HAVE_OPENSSL
diff --git a/src/rspamd.h b/src/rspamd.h
index 0a0fb45fc..ea11965fb 100644
--- a/src/rspamd.h
+++ b/src/rspamd.h
@@ -33,8 +33,6 @@
#include "libserver/task.h"
#include <openssl/ssl.h>
-#include <magic.h>
-
/* Default values */
#define FIXED_CONFIG_FILE RSPAMD_CONFDIR "/rspamd.conf"
@@ -353,7 +351,6 @@ struct zstd_dictionary {
struct rspamd_radix_map_helper;
struct rspamd_external_libs_ctx {
- magic_t libmagic;
struct rspamd_radix_map_helper **local_addrs;
struct rspamd_cryptobox_library_ctx *crypto_ctx;
struct ottery_config *ottery_cfg;