aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-03-06 12:36:45 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-03-06 12:38:28 +0000
commitf08fd055ce127140554d3a50f4feffdf0c803154 (patch)
tree23a762126d2693907da519f49ff63d2b0f144be6 /src
parent4240400bc04d6c06ce5e1cebdbd77441ff2fb35f (diff)
downloadrspamd-f08fd055ce127140554d3a50f4feffdf0c803154.tar.gz
rspamd-f08fd055ce127140554d3a50f4feffdf0c803154.zip
[Feature] Enchance text_part:get_content method
This method now supports otional `type` attribute which could be following: - `content` (default): utf8 content with HTML tags stripped and newlines preserved - `content_oneline`: utf8 content with HTML tags and newlines stripped - `raw`: raw content, not mime decoded nor utf8 converted - `raw_parsed`: raw content, mime decoded, not utf8 converted - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
Diffstat (limited to 'src')
-rw-r--r--src/libmime/message.c2
-rw-r--r--src/libmime/message.h1
-rw-r--r--src/lua/lua_mimepart.c48
3 files changed, 47 insertions, 4 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index a4f3be5ca..40769037b 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -497,6 +497,7 @@ rspamd_message_process_text_part (struct rspamd_task *task,
&text_part->exceptions,
task->urls,
task->emails);
+ text_part->utf_raw_content = part_content;
if (text_part->content->len == 0) {
text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
@@ -526,6 +527,7 @@ rspamd_message_process_text_part (struct rspamd_task *task,
text_part->content = rspamd_mime_text_part_maybe_convert (task,
text_part);
+ text_part->utf_raw_content = text_part->content;
if (text_part->content != NULL) {
/*
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 8c0f919ea..15fcfcccc 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -82,6 +82,7 @@ struct rspamd_mime_text_part {
rspamd_ftok_t raw;
rspamd_ftok_t parsed;
GByteArray *content;
+ GByteArray *utf_raw_content;
GByteArray *stripped_content;
GPtrArray *newlines; /**< positions of newlines in text */
struct html_content *html;
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 6d17c3a66..1ff3dbd58 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -46,8 +46,13 @@ end
*/
LUA_FUNCTION_DEF (textpart, is_utf);
/***
- * @method text_part:get_content()
- * Get the text of the part (html tags stripped)
+ * @method text_part:get_content([type])
+ * Get the text of the part (html tags stripped). Optional `type` defines type of content to get:
+ * - `content` (default): utf8 content with HTML tags stripped and newlines preserved
+ * - `content_oneline`: utf8 content with HTML tags and newlines stripped
+ * - `raw`: raw content, not mime decoded nor utf8 converted
+ * - `raw_parsed`: raw content, mime decoded, not utf8 converted
+ * - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
* @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
*/
LUA_FUNCTION_DEF (textpart, get_content);
@@ -354,16 +359,51 @@ lua_textpart_get_content (lua_State * L)
{
struct rspamd_mime_text_part *part = lua_check_textpart (L);
struct rspamd_lua_text *t;
+ gsize len;
+ const gchar *start, *type = NULL;
if (part == NULL || IS_PART_EMPTY (part)) {
lua_pushnil (L);
return 1;
}
+ if (lua_type (L, 2) == LUA_TSTRING) {
+ type = lua_tostring (L, 2);
+ }
+
t = lua_newuserdata (L, sizeof (*t));
rspamd_lua_setclass (L, "rspamd{text}", -1);
- t->start = part->content->data;
- t->len = part->content->len;
+
+ if (!type) {
+ start = part->content->data;
+ len = part->content->len;
+ }
+ else if (strcmp (type, "content") == 0) {
+ start = part->content->data;
+ len = part->content->len;
+ }
+ else if (strcmp (type, "content_oneline") == 0) {
+ start = part->stripped_content->data;
+ len = part->stripped_content->len;
+ }
+ else if (strcmp (type, "raw_parsed") == 0) {
+ start = part->parsed.begin;
+ len = part->parsed.len;
+ }
+ else if (strcmp (type, "raw_utf") == 0) {
+ start = part->utf_raw_content->data;
+ len = part->utf_raw_content->len;
+ }
+ else if (strcmp (type, "raw") == 0) {
+ start = part->raw.begin;
+ len = part->raw.len;
+ }
+ else {
+ return luaL_error (L, "invalid content type: %s", type);
+ }
+
+ t->start = start;
+ t->len = len;
t->flags = 0;
return 1;