From f08fd055ce127140554d3a50f4feffdf0c803154 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 6 Mar 2017 12:36:45 +0000 Subject: [PATCH] [Feature] Enchance text_part:get_content method This method now supports otional `type` attribute which could be following: - `content` (default): utf8 content with HTML tags stripped and newlines preserved - `content_oneline`: utf8 content with HTML tags and newlines stripped - `raw`: raw content, not mime decoded nor utf8 converted - `raw_parsed`: raw content, mime decoded, not utf8 converted - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines) --- src/libmime/message.c | 2 ++ src/libmime/message.h | 1 + src/lua/lua_mimepart.c | 48 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index a4f3be5ca..40769037b 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -497,6 +497,7 @@ rspamd_message_process_text_part (struct rspamd_task *task, &text_part->exceptions, task->urls, task->emails); + text_part->utf_raw_content = part_content; if (text_part->content->len == 0) { text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY; @@ -526,6 +527,7 @@ rspamd_message_process_text_part (struct rspamd_task *task, text_part->content = rspamd_mime_text_part_maybe_convert (task, text_part); + text_part->utf_raw_content = text_part->content; if (text_part->content != NULL) { /* diff --git a/src/libmime/message.h b/src/libmime/message.h index 8c0f919ea..15fcfcccc 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -82,6 +82,7 @@ struct rspamd_mime_text_part { rspamd_ftok_t raw; rspamd_ftok_t parsed; GByteArray *content; + GByteArray *utf_raw_content; GByteArray *stripped_content; GPtrArray *newlines; /**< positions of newlines in text */ struct html_content *html; diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 6d17c3a66..1ff3dbd58 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -46,8 +46,13 @@ end */ LUA_FUNCTION_DEF (textpart, is_utf); /*** - * @method text_part:get_content() - * Get the text of the part (html tags stripped) + * @method text_part:get_content([type]) + * Get the text of the part (html tags stripped). Optional `type` defines type of content to get: + * - `content` (default): utf8 content with HTML tags stripped and newlines preserved + * - `content_oneline`: utf8 content with HTML tags and newlines stripped + * - `raw`: raw content, not mime decoded nor utf8 converted + * - `raw_parsed`: raw content, mime decoded, not utf8 converted + * - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines) * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string) */ LUA_FUNCTION_DEF (textpart, get_content); @@ -354,16 +359,51 @@ lua_textpart_get_content (lua_State * L) { struct rspamd_mime_text_part *part = lua_check_textpart (L); struct rspamd_lua_text *t; + gsize len; + const gchar *start, *type = NULL; if (part == NULL || IS_PART_EMPTY (part)) { lua_pushnil (L); return 1; } + if (lua_type (L, 2) == LUA_TSTRING) { + type = lua_tostring (L, 2); + } + t = lua_newuserdata (L, sizeof (*t)); rspamd_lua_setclass (L, "rspamd{text}", -1); - t->start = part->content->data; - t->len = part->content->len; + + if (!type) { + start = part->content->data; + len = part->content->len; + } + else if (strcmp (type, "content") == 0) { + start = part->content->data; + len = part->content->len; + } + else if (strcmp (type, "content_oneline") == 0) { + start = part->stripped_content->data; + len = part->stripped_content->len; + } + else if (strcmp (type, "raw_parsed") == 0) { + start = part->parsed.begin; + len = part->parsed.len; + } + else if (strcmp (type, "raw_utf") == 0) { + start = part->utf_raw_content->data; + len = part->utf_raw_content->len; + } + else if (strcmp (type, "raw") == 0) { + start = part->raw.begin; + len = part->raw.len; + } + else { + return luaL_error (L, "invalid content type: %s", type); + } + + t->start = start; + t->len = len; t->flags = 0; return 1; -- 2.39.5