From e0fd3d254d0f026ec7571069ee7bca77bbd9f4f3 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 16 Nov 2018 15:36:54 +0000 Subject: [PATCH] [Minor] Add methods to extract real charset from text parts --- src/libmime/mime_encoding.c | 6 ++++++ src/lua/lua_mimepart.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 8788403f3..c30cbe3e3 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -690,6 +690,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, } checked = TRUE; + text_part->real_charset = charset; } else { SET_PART_UTF (text_part); @@ -697,6 +698,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, rspamd_mime_text_part_ucs_from_utf (task, text_part); rspamd_mime_text_part_normalise (task, text_part); rspamd_mime_text_part_maybe_renormalise (task, text_part); + text_part->real_charset = UTF8_CHARSET; return; } @@ -710,6 +712,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len)); msg_info_task ("detected charset: %s", charset); checked = TRUE; + text_part->real_charset = charset; } } @@ -730,6 +733,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, rspamd_mime_text_part_ucs_from_utf (task, text_part); rspamd_mime_text_part_normalise (task, text_part); rspamd_mime_text_part_maybe_renormalise (task, text_part); + text_part->real_charset = UTF8_CHARSET; return; } @@ -748,6 +752,8 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, text_part->utf_raw_content = part_content; return; } + + text_part->real_charset = charset; } SET_PART_UTF (text_part); diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 33d1a3011..5a1bd0860 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -162,6 +162,13 @@ LUA_FUNCTION_DEF (textpart, get_html); * @return {string} short abbreviation (such as `ru`) for the script's language */ LUA_FUNCTION_DEF (textpart, get_language); + +/*** + * @method text_part:get_charset() + * Returns part real charset + * @return {string} charset of the part + */ +LUA_FUNCTION_DEF (textpart, get_charset); /*** * @method text_part:get_languages() * Returns array of tables of all languages detected for a part: @@ -205,6 +212,7 @@ static const struct luaL_reg textpartlib_m[] = { LUA_INTERFACE_DEF (textpart, is_html), LUA_INTERFACE_DEF (textpart, get_html), LUA_INTERFACE_DEF (textpart, get_language), + LUA_INTERFACE_DEF (textpart, get_charset), LUA_INTERFACE_DEF (textpart, get_languages), LUA_INTERFACE_DEF (textpart, get_mimepart), LUA_INTERFACE_DEF (textpart, get_stats), @@ -841,6 +849,28 @@ lua_textpart_get_language (lua_State * L) return 1; } +static gint +lua_textpart_get_charset (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_text_part *part = lua_check_textpart (L); + + if (part != NULL) { + if (part->real_charset != NULL) { + lua_pushstring (L, part->real_charset); + return 1; + } + else { + lua_pushnil (L); + } + } + else { + return luaL_error (L, "invalid arguments"); + } + + return 1; +} + static gint lua_textpart_get_languages (lua_State * L) { -- 2.39.5