diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-16 15:36:54 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-16 15:36:54 +0000 |
commit | e0fd3d254d0f026ec7571069ee7bca77bbd9f4f3 (patch) | |
tree | 1916265a3906aebfb8bb5b97bf54d693028d7e25 | |
parent | 3c4fbc745b1f34ad505749f5110722d7a13f0cee (diff) | |
download | rspamd-e0fd3d254d0f026ec7571069ee7bca77bbd9f4f3.tar.gz rspamd-e0fd3d254d0f026ec7571069ee7bca77bbd9f4f3.zip |
[Minor] Add methods to extract real charset from text parts
-rw-r--r-- | src/libmime/mime_encoding.c | 6 | ||||
-rw-r--r-- | src/lua/lua_mimepart.c | 30 |
2 files changed, 36 insertions, 0 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 8788403f3..c30cbe3e3 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -690,6 +690,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, } checked = TRUE; + text_part->real_charset = charset; } else { SET_PART_UTF (text_part); @@ -697,6 +698,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, rspamd_mime_text_part_ucs_from_utf (task, text_part); rspamd_mime_text_part_normalise (task, text_part); rspamd_mime_text_part_maybe_renormalise (task, text_part); + text_part->real_charset = UTF8_CHARSET; return; } @@ -710,6 +712,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len)); msg_info_task ("detected charset: %s", charset); checked = TRUE; + text_part->real_charset = charset; } } @@ -730,6 +733,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, rspamd_mime_text_part_ucs_from_utf (task, text_part); rspamd_mime_text_part_normalise (task, text_part); rspamd_mime_text_part_maybe_renormalise (task, text_part); + text_part->real_charset = UTF8_CHARSET; return; } @@ -748,6 +752,8 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, text_part->utf_raw_content = part_content; return; } + + text_part->real_charset = charset; } SET_PART_UTF (text_part); diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 33d1a3011..5a1bd0860 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -162,6 +162,13 @@ LUA_FUNCTION_DEF (textpart, get_html); * @return {string} short abbreviation (such as `ru`) for the script's language */ LUA_FUNCTION_DEF (textpart, get_language); + +/*** + * @method text_part:get_charset() + * Returns part real charset + * @return {string} charset of the part + */ +LUA_FUNCTION_DEF (textpart, get_charset); /*** * @method text_part:get_languages() * Returns array of tables of all languages detected for a part: @@ -205,6 +212,7 @@ static const struct luaL_reg textpartlib_m[] = { LUA_INTERFACE_DEF (textpart, is_html), LUA_INTERFACE_DEF (textpart, get_html), LUA_INTERFACE_DEF (textpart, get_language), + LUA_INTERFACE_DEF (textpart, get_charset), LUA_INTERFACE_DEF (textpart, get_languages), LUA_INTERFACE_DEF (textpart, get_mimepart), LUA_INTERFACE_DEF (textpart, get_stats), @@ -842,6 +850,28 @@ lua_textpart_get_language (lua_State * L) } static gint +lua_textpart_get_charset (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_text_part *part = lua_check_textpart (L); + + if (part != NULL) { + if (part->real_charset != NULL) { + lua_pushstring (L, part->real_charset); + return 1; + } + else { + lua_pushnil (L); + } + } + else { + return luaL_error (L, "invalid arguments"); + } + + return 1; +} + +static gint lua_textpart_get_languages (lua_State * L) { LUA_TRACE_POINT; |