]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Add methods to extract real charset from text parts
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 16 Nov 2018 15:36:54 +0000 (15:36 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 16 Nov 2018 15:36:54 +0000 (15:36 +0000)
src/libmime/mime_encoding.c
src/lua/lua_mimepart.c

index 8788403f3ed7e4af785ee0bc094ba40dbe7f9789..c30cbe3e3b8f4b529181a5094a86e490cef996a4 100644 (file)
@@ -690,6 +690,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                        }
 
                        checked = TRUE;
+                       text_part->real_charset = charset;
                }
                else {
                        SET_PART_UTF (text_part);
@@ -697,6 +698,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                        rspamd_mime_text_part_ucs_from_utf (task, text_part);
                        rspamd_mime_text_part_normalise (task, text_part);
                        rspamd_mime_text_part_maybe_renormalise (task, text_part);
+                       text_part->real_charset = UTF8_CHARSET;
 
                        return;
                }
@@ -710,6 +712,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                                        MIN (RSPAMD_CHARSET_MAX_CONTENT, part_content->len));
                        msg_info_task ("detected charset: %s", charset);
                        checked = TRUE;
+                       text_part->real_charset = charset;
                }
        }
 
@@ -730,6 +733,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                rspamd_mime_text_part_ucs_from_utf (task, text_part);
                rspamd_mime_text_part_normalise (task, text_part);
                rspamd_mime_text_part_maybe_renormalise (task, text_part);
+               text_part->real_charset = UTF8_CHARSET;
 
                return;
        }
@@ -748,6 +752,8 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
                        text_part->utf_raw_content = part_content;
                        return;
                }
+
+               text_part->real_charset = charset;
        }
 
        SET_PART_UTF (text_part);
index 33d1a3011d8371b7100f7468fe9a4b3857547a45..5a1bd0860dcc66aaa826f7847e6b2fb553ac6c2d 100644 (file)
@@ -162,6 +162,13 @@ LUA_FUNCTION_DEF (textpart, get_html);
  * @return {string} short abbreviation (such as `ru`) for the script's language
  */
 LUA_FUNCTION_DEF (textpart, get_language);
+
+/***
+ * @method text_part:get_charset()
+ * Returns part real charset
+ * @return {string} charset of the part
+ */
+LUA_FUNCTION_DEF (textpart, get_charset);
 /***
  * @method text_part:get_languages()
  * Returns array of tables of all languages detected for a part:
@@ -205,6 +212,7 @@ static const struct luaL_reg textpartlib_m[] = {
        LUA_INTERFACE_DEF (textpart, is_html),
        LUA_INTERFACE_DEF (textpart, get_html),
        LUA_INTERFACE_DEF (textpart, get_language),
+       LUA_INTERFACE_DEF (textpart, get_charset),
        LUA_INTERFACE_DEF (textpart, get_languages),
        LUA_INTERFACE_DEF (textpart, get_mimepart),
        LUA_INTERFACE_DEF (textpart, get_stats),
@@ -841,6 +849,28 @@ lua_textpart_get_language (lua_State * L)
        return 1;
 }
 
+static gint
+lua_textpart_get_charset (lua_State * L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_mime_text_part *part = lua_check_textpart (L);
+
+       if (part != NULL) {
+               if (part->real_charset != NULL) {
+                       lua_pushstring (L, part->real_charset);
+                       return 1;
+               }
+               else {
+                       lua_pushnil (L);
+               }
+       }
+       else {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       return 1;
+}
+
 static gint
 lua_textpart_get_languages (lua_State * L)
 {