[Feature] Enchance text_part:get_content method

This method now supports otional `type` attribute which could be following: - `content` (default): utf8 content with HTML tags stripped and newlines preserved - `content_oneline`: utf8 content with HTML tags and newlines stripped - `raw`: raw content, not mime decoded nor utf8 converted - `raw_parsed`: raw content, mime decoded, not utf8 converted - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2017-03-06 12:36:45 +0000
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2017-03-06 12:38:28 +0000
commit: f08fd055ce127140554d3a50f4feffdf0c803154 (patch)
tree: 23a762126d2693907da519f49ff63d2b0f144be6 /src
parent: 4240400bc04d6c06ce5e1cebdbd77441ff2fb35f (diff)
download: rspamd-f08fd055ce127140554d3a50f4feffdf0c803154.tar.gz
rspamd-f08fd055ce127140554d3a50f4feffdf0c803154.zip
3 files changed, 47 insertions, 4 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index a4f3be5ca..40769037b 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -497,6 +497,7 @@ rspamd_message_process_text_part (struct rspamd_task *task,
 				&text_part->exceptions,
 				task->urls,
 				task->emails);
+		text_part->utf_raw_content = part_content;
 
 		if (text_part->content->len == 0) {
 			text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
@@ -526,6 +527,7 @@ rspamd_message_process_text_part (struct rspamd_task *task,
 
 		text_part->content = rspamd_mime_text_part_maybe_convert (task,
 				text_part);
+		text_part->utf_raw_content = text_part->content;
 
 		if (text_part->content != NULL) {
 			/*
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 8c0f919ea..15fcfcccc 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -82,6 +82,7 @@ struct rspamd_mime_text_part {
 	rspamd_ftok_t raw;
 	rspamd_ftok_t parsed;
 	GByteArray *content;
+	GByteArray *utf_raw_content;
 	GByteArray *stripped_content;
 	GPtrArray *newlines;	/**< positions of newlines in text					*/
 	struct html_content *html;
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 6d17c3a66..1ff3dbd58 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -46,8 +46,13 @@ end
  */
 LUA_FUNCTION_DEF (textpart, is_utf);
 /***
- * @method text_part:get_content()
- * Get the text of the part (html tags stripped)
+ * @method text_part:get_content([type])
+ * Get the text of the part (html tags stripped). Optional `type` defines type of content to get:
+ * - `content` (default): utf8 content with HTML tags stripped and newlines preserved
+ * - `content_oneline`: utf8 content with HTML tags and newlines stripped
+ * - `raw`: raw content, not mime decoded nor utf8 converted
+ * - `raw_parsed`: raw content, mime decoded, not utf8 converted
+ * - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
  * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
  */
 LUA_FUNCTION_DEF (textpart, get_content);
@@ -354,16 +359,51 @@ lua_textpart_get_content (lua_State * L)
 {
 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
 	struct rspamd_lua_text *t;
+	gsize len;
+	const gchar *start, *type = NULL;
 
 	if (part == NULL || IS_PART_EMPTY (part)) {
 		lua_pushnil (L);
 		return 1;
 	}
 
+	if (lua_type (L, 2) == LUA_TSTRING) {
+		type = lua_tostring (L, 2);
+	}
+
 	t = lua_newuserdata (L, sizeof (*t));
 	rspamd_lua_setclass (L, "rspamd{text}", -1);
-	t->start = part->content->data;
-	t->len = part->content->len;
+
+	if (!type) {
+		start = part->content->data;
+		len = part->content->len;
+	}
+	else if (strcmp (type, "content") == 0) {
+		start = part->content->data;
+		len = part->content->len;
+	}
+	else if (strcmp (type, "content_oneline") == 0) {
+		start = part->stripped_content->data;
+		len = part->stripped_content->len;
+	}
+	else if (strcmp (type, "raw_parsed") == 0) {
+		start = part->parsed.begin;
+		len = part->parsed.len;
+	}
+	else if (strcmp (type, "raw_utf") == 0) {
+		start = part->utf_raw_content->data;
+		len = part->utf_raw_content->len;
+	}
+	else if (strcmp (type, "raw") == 0) {
+		start = part->raw.begin;
+		len = part->raw.len;
+	}
+	else {
+		return luaL_error (L, "invalid content type: %s", type);
+	}
+
+	t->start = start;
+	t->len = len;
 	t->flags = 0;
 
 	return 1;
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2017-03-06 12:36:45 +0000
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2017-03-06 12:38:28 +0000
commit	f08fd055ce127140554d3a50f4feffdf0c803154 (patch)
tree	23a762126d2693907da519f49ff63d2b0f144be6 /src
parent	4240400bc04d6c06ce5e1cebdbd77441ff2fb35f (diff)
download	rspamd-f08fd055ce127140554d3a50f4feffdf0c803154.tar.gz rspamd-f08fd055ce127140554d3a50f4feffdf0c803154.zip